Ejemplo n.º 1
0
    def test_order1(self):

        async def aprint(x):
            print(x)

        def job(n):
            return Job(aprint(n), label=n)

        sub1, sub2, sub3, sub4 = Scheduler(), Scheduler(), Scheduler(), Scheduler()

        sched = Scheduler(
            Sequence(
                job('top'),
                sub1,
                job('middle'),
                sub2,
                sub3,
                sub4))

        for i in range(3):
            sub1.add(job(i+1))
            sub2.add(job(i+4))
            sub3.add(job(i+7))
            sub4.add(job(i+10))

        sub4.add(job(13))

        produce_png(sched, "test_png_order1")
Ejemplo n.º 2
0
    def test_shutdown_nested_timeout(self):

        # so here we create 16 jobs for which the shutdown
        # durations will be
        # 0.0 0.1 0.2 0.3 - 1.0 1.1 1.2 1.3
        # 2.0 2.1 2.2 2.3 - 3.0 3.1 3.2 3.3
        # so if we set shutdown_timeout = 0.9s, we should
        # still find counter == 12

        cardinal = 4

        # same to the square
        top = CounterScheduler(label="TOP", shutdown_timeout=0.9)
        subs = []
        for i in range(cardinal):
            sub = Scheduler(label=f"SUB {i}")
            subs.append(sub)
            sub.add(
                Sequence(*[
                    CounterJob(top, 10 * i + j, aprint('ok'), label=10 * i + j)
                    for j in range(cardinal)
                ]))
        top.add(Sequence(*subs))

        self.assertEqual(top.counter, 0)
        self.assertTrue(top.run())
        self.assertEqual(top.counter, cardinal * cardinal)
        self.assertFalse(top.shutdown())
        self.assertEqual(top.counter, cardinal * (cardinal - 1))
Ejemplo n.º 3
0
 def test_sequence6(self):
     "adding a sequence"
     sched = Scheduler()
     a1 = J(sl(0.1), label=1)
     a2 = J(sl(0.1), label=2)
     a3 = J(sl(0.1), label=3)
     sched.add(Seq(a1, a2, a3))
     self.assertTrue(sched.orchestrate())
Ejemplo n.º 4
0
    def test_display(self):

        class FakeTask:

            def __init__(self):
                self._result = 0
                self._exception = None

        def annotate_job_with_fake_task(job, state, boom):
            task = FakeTask()
            if state == "done":
                task._state = asyncio.futures._FINISHED
                job._task = task
                job._running = True
            elif state == "running":
                task._state = "NONE"
                job._task = task
                job._running = True
            elif state == "scheduled":
                task._state = "NONE"
                job._task = task
                job._running = False
            else:
                pass

            # here we assume that a job that has raised an exception is
            # necessarily done
            if boom:
                if state in ("idle", "scheduled", "running"):
                    print("incompatible combination boom x idle - ignored")
                    return
                else:
                    job._task._exception = True
            return job

        class J(AbstractJob):
            pass

        sched = Scheduler()
        previous = None
        for state in "idle", "scheduled", "running", "done":
            for boom in True, False:
                for critical in True, False:
                    for forever in True, False:
                        j = J(critical=critical,
                              forever=forever,
                              label="forever={} crit.={} status={} boom={}"
                              .format(forever, critical, state, boom),
                              required=previous
                              )
                        if annotate_job_with_fake_task(j, state, boom):
                            sched.add(j)
                            previous = j
        sched.list()
Ejemplo n.º 5
0
 def check_expansion(self, *deferred_expected_s):
     s = Scheduler()
     formatters = {}
     for deferred, _ in deferred_expected_s:
         formatters[deferred] = f = CaptureFormatter()
         f.start_capture()
         n = SshNode(localhostname(), username=localuser(), formatter=f)
         s.add(SshJob(node=n, commands=Run(deferred)))
     s.run()
     for deferred, expected in deferred_expected_s:
         captured = formatters[deferred].get_capture()
         self.assertEqual(captured, expected)
Ejemplo n.º 6
0
    def test_capture(self):

        s = Scheduler()
        f = CaptureFormatter()
        n = SshNode(localhostname(), username=localuser(), formatter=f)
        s.add(SshJob(node=n, commands=[
            Run("echo LINE1"),
            Run("echo LINE2"),
        ]))

        f.start_capture()
        s.run()
        captured = f.get_capture()

        expected = "LINE1\nLINE2\n"
        self.assertEqual(captured, expected)
Ejemplo n.º 7
0
    def test_nested_cycles(self):

        watch = Watch()

        def job(i):
            return Job(co_print_sleep(watch, .2, f"job {i}"),
                       label=f"job{i}")
        js1, js2, js3 = [job(i) for i in range(11, 14)]
        s2 = Scheduler(Sequence(js1, js2, js3))

        j1, j3 = job(1), job(3)
        s1 = Scheduler(Sequence(j1, s2, j3))
        self.assertTrue(s1.check_cycles())

        # create cycle in subgraph
        js1.requires(js3)
        self.assertFalse(s1.check_cycles())

        # restore in OK state
        js1.requires(js3, remove=True)
        self.assertTrue(s1.check_cycles())

        # add cycle in toplevel
        j1.requires(j3)
        self.assertFalse(s1.check_cycles())

        # restore in OK state
        j1.requires(j3, remove=True)
        self.assertTrue(s1.check_cycles())

        # add one level down
        s3 = Scheduler()
        jss1, jss2, jss3 = [job(i) for i in range(111, 114)]
        Sequence(jss1, jss2, jss3, scheduler=s3)

        # surgery in s2; no cycles
        s2.remove(js2)
        s2.sanitize()
        s2.add(s3)
        s3.requires(js1)
        js3.requires(s3)
        self.assertTrue(s1.check_cycles())

        # add cycle in s3
        js1.requires(js3)
        self.assertFalse(s1.check_cycles())
Ejemplo n.º 8
0
    def test_shutdown_nested(self):

        cardinal = 4

        # same to the square
        top = CounterScheduler(label="TOP")
        subs = []
        for i in range(cardinal):
            sub = Scheduler(label=f"SUB {i}")
            subs.append(sub)
            sub.add(
                Sequence(*[
                    CounterJob(top, 0, aprint('ok'), label=10 * i + j)
                    for j in range(cardinal)
                ]))
        top.add(Sequence(*subs))

        self.assertEqual(top.counter, 0)
        self.assertTrue(top.run())
        self.assertEqual(top.counter, cardinal * cardinal)
        self.assertTrue(top.shutdown())
        self.assertEqual(top.counter, 0)
Ejemplo n.º 9
0
    def populate_sched(self, scheduler, jobs, nested=0, pack_job=1):

        if nested != 0:
            for cpt_job, job in enumerate(jobs):
                if cpt_job % pack_job == 0:
                    core_sched = Scheduler()
                top_sched = core_sched
                current_sched = core_sched
                for i in range(nested-1):
                    top_sched = Scheduler()
                    top_sched.add(current_sched)
                    current_sched = top_sched
                core_sched.add(job)
                if cpt_job % pack_job == 0:
                    scheduler.add(top_sched)

            #scheds = [Scheduler(job, scheduler=scheduler) for job in jobs]

        else:
            for job in jobs:
                scheduler.add(job)

        return scheduler
Ejemplo n.º 10
0
def one_run(tx_power, phy_rate, antenna_mask, channel, *,
            run_name=default_run_name, slicename=default_slicename,
            load_images=False, node_ids=None,
            parallel=None,
            verbose_ssh=False, verbose_jobs=False, dry_run=False):
    """
    Performs data acquisition on all nodes with the following settings

    Arguments:
        tx_power: in dBm, a string like 5, 10 or 14
        phy_rate: a string among 1, 54
        antenna_mask: a string among 1, 3, 7
        channel: a string like e.g. 1 or 40
        run_name: the name for a subdirectory where all data will be kept
                  successive runs should use the same name for further visualization
        slicename: the Unix login name (slice name) to enter the gateway
        load_images: a boolean specifying whether nodes should be re-imaged first
        node_ids: a list of node ids to run the scenario on; strings or ints are OK;
                  defaults to the all 37 nodes i.e. the whole testbed
        parallel: a number of simulataneous jobs to run
                  1 means all data acquisition is sequential (default)
                  0 means maximum parallel
    """

    #
    # dry-run mode
    # just display a one-liner with parameters
    #
    if dry_run:
        load_msg = "" if not load_images else " LOAD"
        nodes = " ".join(str(n) for n in node_ids)
        print("dry-run: {run_name}{load_msg} -"
              " t{tx_power} r{phy_rate} a{antenna_mask} ch{channel} -"
              "nodes {nodes}"
              .format(**locals()))
        # in dry-run mode we are done
        return True

    # set default for the nodes parameter
    node_ids = [int(id)
                for id in node_ids] if node_ids is not None else default_node_ids

    ###
    # create the logs directory based on input parameters
    run_root = naming_scheme(run_name, tx_power, phy_rate,
                             antenna_mask, channel, autocreate=True)

    # the nodes involved
    faraday = SshNode(hostname=default_gateway, username=slicename,
                      formatter=TimeColonFormatter(), verbose=verbose_ssh)

    # this is a python dictionary that allows to retrieve a node object
    # from an id
    node_index = {
        id: SshNode(gateway=faraday, hostname=fitname(id), username="******",
                    formatter=TimeColonFormatter(), verbose=verbose_ssh)
        for id in node_ids
    }

    # the global scheduler
    scheduler = Scheduler(verbose=verbose_jobs)

    ##########
    check_lease = SshJob(
        scheduler=scheduler,
        node=faraday,
        verbose=verbose_jobs,
        critical=True,
        command=Run("rhubarbe leases --check"),
    )

    # load images if requested

    green_light = check_lease

    if load_images:
        # the nodes that we **do not** use should be turned off
        # so if we have selected e.g. nodes 10 12 and 15, we will do
        # rhubarbe off -a ~10 ~12 ~15, meaning all nodes except 10, 12 and 15
        negated_node_ids = ["~{}".format(id) for id in node_ids]
        # replace green_light in this case
        green_light = SshJob(
            node=faraday,
            required=check_lease,
            critical=True,
            scheduler=scheduler,
            verbose=verbose_jobs,
            commands=[
                Run("rhubarbe", "off", "-a", *negated_node_ids),
                Run("rhubarbe", "load", "-i", "u16-ath-noreg", *node_ids),
                Run("rhubarbe", "wait", *node_ids)
            ]
        )

    ##########
    # setting up the wireless interface on all nodes
    #
    # this is a python feature known as a list comprehension
    # we just create as many SshJob instances as we have
    # (id, SshNode) couples in node_index
    # and gather them all in init_wireless_jobs
    # they all depend on green_light
    #
    # provide node-utilities with the ranges/units it expects
    frequency = channel_frequency[int(channel)]
    # tx_power_in_mBm not in dBm
    tx_power_driver = tx_power * 100

    if load_images:
        # The first init_wireless_jobs always has troubles... Do it twice the first time (nasty hack)
        init_wireless_jobs = [
            SshJob(
                scheduler=scheduler,
                required=green_light,
                node=node,
                verbose=verbose_jobs,
                label="init {}".format(id),
                commands=[
                    RunScript("node-utilities.sh", "init-ad-hoc-network",
                              wireless_driver, "foobar", frequency, phy_rate, 
                              antenna_mask, tx_power_driver),
                    RunScript("node-utilities.sh", "init-ad-hoc-network",
                              wireless_driver, "foobar", frequency, phy_rate, 
                              antenna_mask, tx_power_driver)
                    ]
            )
        for id, node in node_index.items()]
    else:
        init_wireless_jobs = [
        SshJob(
            scheduler=scheduler,
            required=green_light,
            node=node,
            verbose=verbose_jobs,
            label="init {}".format(id),
            command=RunScript("node-utilities.sh", "init-ad-hoc-network",
                               wireless_driver, "foobar", frequency, phy_rate, 
                               antenna_mask, tx_power_driver)
            )
        for id, node in node_index.items()]


    # then install and run olsr on fit nodes
    run_olsr = [
        SshJob(
            scheduler=scheduler,
            node=node,
            required=init_wireless_jobs,
            label="init and run olsr on fit nodes",
            verbose=verbose_jobs,
            command=RunScript("node-utilities.sh", "run-olsr")
            )
        for i, node in node_index.items()]


    # after that, run tcpdump on fit nodes, this job never ends...
    run_tcpdump = [
        SshJob(
            scheduler=scheduler,
            node=node,
            required=run_olsr,
            label="run tcpdump on fit nodes",
            verbose=verbose_jobs,
            commands=[
                Run("echo run tcpdump on fit{:02d}".format(i)),
                Run("tcpdump -U -i moni-{} -y ieee802_11_radio -w /tmp/fit{}.pcap".format(wireless_driver, i))
            ]
            )
        for i, node in node_index.items()]

    # let the wireless network settle
    settle_wireless_job = PrintJob(
        "Let the wireless network settle",
        sleep=settle_delay,
        scheduler=scheduler,
        required=run_olsr,
        label="settling")

    ##########
    # create all the ping jobs, i.e. max*(max-1)/2
    # this again is a python list comprehension
    # see the 2 for instructions at the bottom
    #
    # notice that these SshJob instances are not yet added
    # to the scheduler, we will add them later on
    # depending on the sequential/parallel strategy

    pings = [
        SshJob(
            node=nodei,
            required=settle_wireless_job,
            label="ping {} -> {}".format(i, j),
            verbose=verbose_jobs,
            commands=[
                Run("echo {} '->' {}".format(i, j)),
                RunScript("node-utilities.sh", "my-ping",
                          "10.0.0.{}".format(j), ping_timeout, ping_interval,
                          ping_size, ping_number,
                          ">", "PING-{:02d}-{:02d}".format(i, j)),
                Pull(remotepaths="PING-{:02d}-{:02d}".format(i, j),
                     localpath=str(run_root)),
            ]
        )
        # looping on the source, now only fit01 is source
        for i, nodei in node_index.items()
        # and on the destination
        for j, nodej in node_index.items()
        # and keep only half of the couples
        if (j > i) and (i==1)
    ]

    # retrieve all pcap files from fit nodes
    retrieve_tcpdump = [
        SshJob(
            scheduler=scheduler,
            node=nodei,
            required=pings,
            label="retrieve pcap trace from fit{:02d}".format(i),
            verbose=verbose_jobs,
            commands=[
                RunScript("node-utilities.sh", "kill-olsr"),
                Run("sleep 1;pkill tcpdump; sleep 1"),
                RunScript("node-utilities.sh", "process-pcap", i),
                Run(
                    "echo retrieving pcap trace and result-{i}.txt from fit{i:02d}".format(i=i)),
                Pull(remotepaths=["/tmp/fit{}.pcap".format(i),
                                  "/tmp/result-{}.txt".format(i)],
                     localpath=str(run_root)),
            ]
        )
        for i, nodei in node_index.items()
    ]

    # xxx this is a little fishy
    # should we not just consider that the default is parallel=1 ?
    if parallel is None:
        # with the sequential strategy, we just need to
        # create a Sequence out of the list of pings
        # Sequence will add the required relationships
        scheduler.add(Sequence(*pings, scheduler=scheduler))
        # for running sequentially we impose no limit on the scheduler
        # that will be limitied anyways by the very structure
        # of the required graph
        jobs_window = None
    else:
        # with the parallel strategy
        # we just need to insert all the ping jobs
        # as each already has its required OK
        scheduler.update(pings)
        # this time the value in parallel is the one
        # to use as the jobs_limit; if 0 then inch'allah
        jobs_window = parallel

    # if not in dry-run mode, let's proceed to the actual experiment
    ok = scheduler.orchestrate(jobs_window=jobs_window)
    # give details if it failed
    if not ok:
        scheduler.debrief()

    # data acquisition is done, let's aggregate results
    # i.e. compute averages
    if ok:
        post_processor = Aggregator(run_root, node_ids, antenna_mask)
        post_processor.run()

    return ok
Ejemplo n.º 11
0
def prepare_testbed_scheduler(  # pylint: disable=r0913, r0914
        gateway: SshNode,
        load_flag: bool,
        experiment_scheduler: Scheduler,
        images_mapping,
        nodes_left_alone=None,
        sdrs_left_alone=None,
        phones_left_alone=None,
        verbose_jobs=False):
    """

    This function is designed as a standard way for experiments to warm up.
    Experimenters only need to write a scheduler that defines the behaviour
    of their core experiment, this function will add additional steps that
    take care of a) checking for a valid lease, b) load images on nodes, and
    c) turn off unused devices.

    It is generally desirable to write an experiment script that has a
    `--load/-l` boolean flag; typically, one would use the ``--load`` flag the
    first time that an experiment is launched during a given timeslot, while
    subsequent calls won't. That is the purpose of the ``load_flag`` below;
    when set to False, only step a) is performed, otherwise the resulting
    scheduler will go for the full monty.

    Parameters:
      gateway_sshnode: the ssh handle to the gateway
      load_flag(bool): if not set, only the lease is checked
      experiment_scheduler: core scheduler for the experiment
      images_mapping: a dictionary that specifies images to be loaded on nodes;
        see examples below
      nodes_left_alone: a list of node numbers that should be left intact,
        neither loaded nor turned off;
      phones_left_alone: a list of node numbers that should be left intact,
        i.e. not switched to airplane mode.

    Return :
      The overall scheduler where the input ``experiment_scheduler`` is embedded.

    Examples:
      Specify a mapping like the following::

          images_mapping = { "ubuntu" : [1, 4, 5], "gnuradio": [16]}

      Note that the format for ``images_mapping``, is flexible;
      if only one node is to be loaded, the iterable level is optional;
      also each node can be specified as an ``int``, a ``bytes``, a ``str``,
      in which case non numeric characters are ignored. So this is a legitimate
      requirement as well::

        images_mapping = { 'openair-cn': 12 + 4,
                           'openair-enodeb': ('fit32',),
                           'ubuntu': {12, 'reboot1', '004',
                                      'you-get-the-picture-34'}
        }
    """

    # handle default mutable args
    nodes_left_alone = set(nodes_left_alone) if nodes_left_alone else set()
    sdrs_left_alone = set(sdrs_left_alone) if sdrs_left_alone else set()
    phones_left_alone = set(phones_left_alone) if phones_left_alone else set()

    scheduler = Scheduler(label="Preparation")

    check_lease = SshJob(
        scheduler=scheduler,
        node=gateway,
        verbose=verbose_jobs,
        label="Check lease {}".format(gateway.username),
        command=Run("rhubarbe leases --check", label="rlease"),
    )

    # if no image loading is requested, we're done here
    if not load_flag:
        scheduler.add(experiment_scheduler)
        experiment_scheduler.requires(check_lease)
        return scheduler

    # otherwise, we want to do in parallel
    # (*) as many image-loading jobs as we have entries in images_mapping
    # (*) one job to turn off phones, nodes and usrps
    #     as parallelizing brings no speed up at all

    # todo ideally we could also probe the testbed to figure out which nodes
    # are currently unavailable, and let them alone as well; but well.

    # the jobs that we need to wait for before going on with the real stuff
    octopus = []

    loaded_nodes = set()

    for image, nodes in images_mapping.items():
        # let's be as flexible as possible
        # (1) empty node list should be fine
        if not nodes:
            continue
        # (2) atomic types should be allowed
        if isinstance(nodes, (int, str, bytes)):
            nodes = [nodes]
        # (3) accept all forms of inputs
        nodes = {r2lab_id(node) for node in nodes}
        duplicates = loaded_nodes & nodes
        if duplicates:
            print("WARNING - nodes in {} have been assigned several images".
                  format(duplicates))
        loaded_nodes.update(nodes)
        # for there on we need strings
        node_args = " ".join(str(node) for node in nodes)
        octopus.append(
            SshJob(
                gateway,
                scheduler=scheduler,
                required=check_lease,
                label=("loading {} on {}".format(image, node_args)),
                commands=[
                    Run("rhubarbe load -i {} {}".format(image, node_args)),
                    Run("rhubarbe wait {}".format(node_args)),
                ],
                verbose=verbose_jobs,
            ))

    ### turn off stuff
    # nodes
    dont_off_nodes = nodes_left_alone | loaded_nodes
    # do turn off usrp device even on loaded nodes
    dont_off_sdrs = sdrs_left_alone
    # phones - there's no equivalent of --all ~ notation with phones
    off_phones = set(range(1, PHONES+1)) \
                 - {r2lab_id(ph) for ph in phones_left_alone}

    r2lab_includes = [
        find_local_embedded_script(x) for x in ("faraday.sh", "r2labutils.sh")
    ]

    if off_phones:
        octopus.append(
            SshJob(gateway,
                   scheduler=scheduler,
                   required=check_lease,
                   critical=False,
                   commands=[
                       RunScript(find_local_embedded_script("faraday.sh"),
                                 "macphone{}".format(phone),
                                 "r2lab-embedded/shell/macphone.sh",
                                 "phone-off",
                                 label="turn off phone {}".format(phone),
                                 includes=r2lab_includes)
                       for phone in off_phones
                   ],
                   verbose=verbose_jobs))

    octopus.append(
        SshJob(
            gateway,
            scheduler=scheduler,
            required=check_lease,
            label="Turn off unused devices",
            commands=[
                Run(_rhubarbe_command(verb="off", left_alone=dont_off_nodes)),
                Run(_rhubarbe_command(verb="usrpoff",
                                      left_alone=dont_off_sdrs)),
            ],
            verbose=verbose_jobs,
        ))

    # embed experiment scheduler
    experiment_scheduler.requires(octopus)
    scheduler.add(experiment_scheduler)

    return scheduler
Ejemplo n.º 12
0
def main(argv):
    if len(argv) == 3:
        print("!! Unfinished routines !!")
    else:
        print("++ Using default settings ++")
        ###########################
        ## Local Variables
        # platform='multiGPU'
        platform='distributed'
        gateway_user='******'
		gateway_host='gw_host'
        node_username='******'

        #########################################################
        ## Distributed Requirements
        num_ps = 1
        num_workers = 2

        #########################################################
        gateway = SshNode(
                        gateway_host,
                        username=gateway_user
                        )

		##########################################################
        elif platform == 'distributed':
            
            ## Jetson-TX2 Cluster
            hosts = [cluster_ip_host]

            #########################################################
            ## Use the Server node for processing the first satge Data-mining
            server = ResourceManager._set_Node(master_host, master_user, gateway,)

            ############################
            # Push the launch file (run_splitpoint)
            # With the Parameters Connfiguration on the server
            # To execute the First Satege in this host
            job_launch_S1 = SshJob(
                        node = server,
                        commands = [
                                ## Run the script locate in the laptop
                                RunScript("run_dataspworkers_mlp.sh", platform, num_ps, num_workers),
                                Run("echo Split Data DONE"),
                                ],
                        )

            #############################
            ## A collection of the PS node
            ps = []
            [ps.append(ResourceManager._set_Node(hosts[i],
                                                node_username, gateway,))
                                                for i in range(num_ps)]

            #############################
            ## A collection of the workers node
            workers = []
            [workers.append(ResourceManager._set_Node(hosts[num_ps+i],
                                                    node_username, gateway,))
                                                    for i in range(num_workers)]

            #########################################################
            ## Setting Parameters for the First Stage
            FEATURES_NAME = "FULL-W1_x1_x2_x3_x4_x5_x7_x8_Y1"
            SANDBOX=str("/data_B/datasets/drg-PACA/healthData/sandbox-"+FEATURES_NAME)
            YEAR=str(2008)

            ## Stage 1
            # localdir = "/1_Mining-Stage/"
            # SP_Dir_X = str(SANDBOX+localdir+"BPPR-"+FEATURES_NAME+"-"YEAR)

            #############################
            ## Setting parameters for the Second Stage
            S_PLOINT = str(3072)    #1536)
            #SP_ARGV = str(S_PLOINT+"-"+platform)
            SP_ARGV = platform+"-"+str(num_workers)
            SP2=str(SANDBOX+"/2_Split-Point-"+SP_ARGV+"/")

            #############################
            ## BPPR Directories
            dir_train = "/data_training/"
            dir_valid = "/data_valid/"
            dir_test = "/data_test/"

            ############################
            ## Worker data management
            worker_healthData = "/opt/diagnosenet/healthData/"
            worker_sandbox = str(worker_healthData+"/sandbox-"+FEATURES_NAME)
            worker_splitpoint = str(worker_sandbox+"/2_Split-Point-"+SP_ARGV+"/")
            worker_train = str(worker_splitpoint+dir_train)
            worker_valid = str(worker_splitpoint+dir_valid)
            worker_test = str(worker_splitpoint+dir_test)

            ############################
            ## Worker commands
            mkd_worker_sandbox = str("mkdir"+" "+worker_sandbox)
            mkd_worker_splitpoint = str("mkdir"+" "+worker_splitpoint)
            mkd_worker_train = str("mkdir"+" "+worker_train)
            mkd_worker_valid = str("mkdir"+" "+worker_valid)
            mkd_worker_test = str("mkdir"+" "+worker_test)

            #############################
            ## Create a JOB to build the sandbox for each Worker
            job_build_sandbox = []

            [ job_build_sandbox.append(SshJob(
                            node = workers[i],
                            commands = [
                                RunString(mkd_worker_sandbox),
                                RunString(mkd_worker_splitpoint),
                                RunString(mkd_worker_train),
                                RunString(mkd_worker_valid),
                                RunString(mkd_worker_test),
                                Run("echo SANDBOX ON WORKER DONE"), ],
                                )) for i in range(len(workers)) ]


            #############################
            ## Create a command for transfer data
            scp = "scp"
            cmd_X_train_transfer = []
            cmd_y_train_transfer = []
            cmd_X_valid_transfer = []
            cmd_y_valid_transfer = []
            cmd_X_test_transfer = []
            cmd_y_test_transfer = []

            for i in range(num_workers):
                worker_host = str(node_user+"@"+ hosts[num_ps+i] +":")
                num_file = str(i+1)
                ## Commands to transfer Training dataset
                X_train_splitted = str(SP2+dir_train+"X_training-"+FEATURES_NAME+"-"+YEAR+"-"+num_file+".txt")
                cmd_X_train_transfer.append(str(scp+" "+X_train_splitted+" "+worker_host+worker_train))
                y_train_splitted = str(SP2+dir_train+"y_training-"+FEATURES_NAME+"-"+YEAR+"-"+num_file+".txt")
                cmd_y_train_transfer.append(str(scp+" "+y_train_splitted+" "+worker_host+worker_train))

                ## Commands to transfer Validation dataset
                X_valid_splitted = str(SP2+dir_valid+"X_valid-"+FEATURES_NAME+"-"+YEAR+"-"+num_file+".txt")
                cmd_X_valid_transfer.append(str(scp+" "+X_valid_splitted+" "+worker_host+worker_valid))
                y_valid_splitted = str(SP2+dir_valid+"y_valid-"+FEATURES_NAME+"-"+YEAR+"-"+num_file+".txt")
                cmd_y_valid_transfer.append(str(scp+" "+y_valid_splitted+" "+worker_host+worker_valid))

                ## Commands to transfer Test dataset
                X_test_splitted = str(SP2+dir_test+"X_test-"+FEATURES_NAME+"-"+YEAR+"-"+num_file+".txt")
                cmd_X_test_transfer.append(str(scp+" "+X_test_splitted+" "+worker_host+worker_test))
                y_test_splitted = str(SP2+dir_test+"y_test-"+FEATURES_NAME+"-"+YEAR+"-"+num_file+".txt")
                cmd_y_test_transfer.append(str(scp+" "+y_test_splitted+" "+worker_host+worker_test))


            ############################
            ## Build a JOB for transfering data to each worker sandbox
            job_data_transfer = []
            [job_data_transfer.append(SshJob(
                        node = server,
                        commands = [
                                    RunString(cmd_X_train_transfer[i]),
                                    RunString(cmd_y_train_transfer[i]),
                                    Run("echo SENDER TRAINING DATA DONE"),
                                    RunString(cmd_X_valid_transfer[i]),
                                    RunString(cmd_y_valid_transfer[i]),
                                    Run("echo SENDER VALID DATA DONE"),
                                    RunString(cmd_X_test_transfer[i]),
                                    RunString(cmd_y_test_transfer[i]),
                                    Run("echo SENDER TEST DATA DONE"),
                                    ],)
                                    ) for i in range(len(workers))]

            #########################################################
            ## Create a sequence orchestration scheduler instance upfront
            worker_seq = []

            ## Add the Stage-1 JOB into Scheduler
            worker_seq.append(Scheduler(Sequence(
                                job_launch_S1)))

            ## Add the worker JOBs into Scheduler
            [worker_seq.append(Scheduler(Sequence(
                                job_build_sandbox[i],
                                job_data_transfer[i], ))
                                ) for i in range(len(workers))]

            #############################
            ## Old method
            ## Add the JOB PS Replicas into Scheduler
            # worker_seq.append(Scheduler(Sequence(
            #                     job_PS_replicas)))
            #
            # ## Add the JOB WORKER Replicas into Scheduler
            # worker_seq.append(Scheduler(Sequence(
            #                     job_WORKER_replicas)))


            #############################
            ## Run the Sequence JOBS
            # [seq.orchestrate() for seq in worker_seq]


            #########################################################
            #########################################################
            ## Push the launch file (run_secondstage_distributed)
            ## With the Distributed Parameters for each worker replicas
            ## To distributed training of Unsupervised Embedding

            #############################
            ## Build a collection of TensorFlow Hosts for PS
            tf_ps = []
            [tf_ps.append(str(hosts[i]+":2222")) for i in range(num_ps)]
            # print("+++ tf_ps: {}".format(tf_ps))
            tf_ps=','.join(tf_ps)

            #############################
            ## Build a collection of TensorFlow Hosts for workers
            tf_workers = []
            [tf_workers.append(str(hosts[num_ps+i]+":2222")) for i in range(num_workers)]
            # print("+++ tf_workers: {}".format(tf_workers))
            tf_workers=','.join(tf_workers)

            job_PS_replicas = []
            [job_PS_replicas.append(SshJob(
                        node = ps[i],
                        commands = [
                                ## Launches local script to execute on cluster
                                # RunScript("run_secondstage_distributed.sh",
                                #             platform, tf_ps, tf_workers,
                                #             num_ps, num_workers, "ps", i),
                                RunScript("run_thirdstage_distributed_mlp.sh",
                                            platform, tf_ps, tf_workers,
                                            num_ps, num_workers, "ps", i),
                                Run("echo PS REPLICA DONE"),
                                ],)
                                ) for i in range(len(ps))]


            job_WORKER_replicas = []
            [job_WORKER_replicas.append(SshJob(
                        node = workers[i],
                        commands = [
                                ## Launches local script to execute on cluster
                                # RunScript("run_secondstage_distributed.sh",
                                #             platform, tf_ps, tf_workers,
                                #             num_ps, num_workers, "worker", i),
                                RunScript("run_thirdstage_distributed_mlp.sh",
                                            platform, tf_ps, tf_workers,
                                            num_ps, num_workers, "worker", i),
                                Run("echo WORKER REPLICA DONE"),
                                ], )
                                ) for i in range(len(workers))]

            #############################
            ### Simultaneous jobs
            s_distraining = Scheduler()
            [s_distraining.add(job_PS_replicas[i]) for i in range(len(ps))]
            [s_distraining.add(job_WORKER_replicas[i]) for i in range(len(workers))]

            s_distraining.run(jobs_window = int(num_ps+num_workers+1))
Ejemplo n.º 13
0
            Pull(remotepaths = "PING-{:02d}-{:02d}".format(i, j), localpath="."),
        ]
    )
    # looping on the source
    for i, nodei in node_index.items()
    # and on the destination
    for j, nodej in node_index.items()
    # and keep only half of the couples
    if j > i
]

if args.parallel is None:
    # with the sequential strategy, we just need to
    # create a Sequence out of the list of pings
    # Sequence will add the required relationships
    scheduler.add(Sequence(*pings, scheduler=scheduler))
    # for running sequentially we impose no limit on the scheduler
    # that will be limitied anyways by the very structure
    # of the required graph
    jobs_window = None
else:
    # with the parallel strategy
    # we just need to insert all the ping jobs
    # as each already has its required OK
    scheduler.update(pings)
    # this time the value in args.parallel is the one
    # to use as the jobs_limit; if 0 then inch'allah
    jobs_window = args.parallel

# finally - i.e. when all pings are done
# we can list the current contents of our local directory
Ejemplo n.º 14
0
def one_run(*, protocol, interference,
            run_name=default_run_name, slicename=default_slicename,
            tx_power, phy_rate, antenna_mask, channel,
            load_images=False,
            node_ids=DEFAULT_NODE_IDS,
            src_ids=DEFAULT_SRC_IDS, dest_ids=DEFAULT_DEST_IDS,
            scrambler_id=DEFAULT_SCRAMBLER_ID,
            tshark=False, map=False, warmup=False,
            route_sampling=False, iperf=False,
            verbose_ssh=False, verbose_jobs=False, dry_run=False,
            run_number=None):
    """
    Performs data acquisition on all nodes with the following settings

    Arguments:
        tx_power: in dBm, a string like 5, 10 or 14.
          Corresponds to the transmission power.
        phy_rate: a string among 1, 54. Correspond to the wifi rate.
        antenna_mask: a string among 1, 3, 7.
        channel: a string like e.g. 1 or 40. Correspond to the channel.
        protocol: a string among batman , olsr. Correspond to the protocol
        interference : in amplitude percentage, a string like 15 or 20.
          Correspond to the power of the noise generated in the spectrum.
          Can be either None or "None" to mean no interference.
        run_name: the name for a subdirectory where all data will be kept
          successive runs should use the same name for further visualization
        slicename: the Unix login name (slice name) to enter the gateway
        load_images: a boolean specifying whether nodes should be re-imaged first
        node_ids: a list of node ids to run the scenario against;
          strings or ints are OK;
        tshark: a boolean specifying wether we should format/parse the .pcap.
        map: a boolean specifying wether we should fetch/parse
          the route tables of the nodes.
        warmup: a boolean specifying whether we should run a ping before
          the experiment to be certain of the stabilisation on the network.
        src_ids: a list of nodes from which we will launch the ping from.
          strings or ints are OK.
        ping_messages : the number of ping packets that will be generated

    """
    # set default for the nodes parameter
    node_ids = ([int(id) for id in node_ids]
                if node_ids is not None else DEFAULT_NODE_IDS)
    src_ids = ([int(id) for id in src_ids]
               if src_ids is not None else DEFAULT_SRC_IDS)
    dest_ids = ([int(id) for id in dest_ids]
                if dest_ids is not None else DEFAULT_NODE_IDS)

    # all nodes - i.e. including sources and destinations -
    # need to run the protocol
    node_ids = list(set(node_ids).union(set(src_ids).union(set(dest_ids))))

    if interference == "None":
        interference = None

    # open result dir no matter what
    run_root = naming_scheme(
        run_name=run_name, protocol=protocol,
        interference=interference, autocreate=True)

# fix me    trace = run_root / f"trace-{%m-%d-%H-%M}"
    ref_time = apssh_time()
    trace = run_root / f"trace-{ref_time}"

    try:
        with trace.open('w') as feed:
            def log_line(line):
                time_line(line, file=feed)
            load_msg = f"{'WITH' if load_images else 'NO'} image loading"
            interference_msg = (f"interference={interference} "
                                f"from scrambler={scrambler_id}")
            nodes = " ".join(str(n) for n in node_ids)
            srcs = " ".join(str(n) for n in src_ids)
            dests = " ".join(str(n) for n in dest_ids)
            ping_labels = [
                f"PING {s} ➡︎ {d}"
                for s in src_ids
                # and on the destination
                for d in dest_ids
                if d != s
            ]

            log_line(f"output in {run_root}")
            log_line(f"trace in {trace}")
            log_line(f"protocol={protocol}")
            log_line(f"{load_msg}")
            log_line(f"{interference_msg}")
            log_line("----")
            log_line(f"Selected nodes : {nodes}")
            log_line(f"Sources : {srcs}")
            log_line(f"Destinations : {dests}")
            for label in ping_labels:
                log_line(f"{label}")
            log_line("----")
            for feature in ('warmup', 'tshark', 'map',
                            'route_sampling', 'iperf'):
                log_line(f"Feature {feature}: {locals()[feature]}")

    except Exception as exc:
        print(f"Cannot write into {trace} - aborting this run")
        print(f"Found exception {type(exc)} - {exc}")
        return False
    #
    # dry-run mode
    # just display a one-liner with parameters
    #
    prelude = "" if not dry_run else "dry_run:"
    with trace.open() as feed:
        print(f"**************** {ref_time} one_run #{run_number}:")
        for line in feed:
            print(prelude, line, sep='', end='')
    if dry_run:
        return True

    # the nodes involved
    faraday = SshNode(hostname=default_gateway, username=slicename,
                      formatter=TimeColonFormatter(), verbose=verbose_ssh)

    # this is a python dictionary that allows to retrieve a node object
    # from an id
    node_index = {
        id: SshNode(gateway=faraday, hostname=fitname(id), username="******",
                    formatter=TimeColonFormatter(), verbose=verbose_ssh)
        for id in node_ids
    }
    # extracts for sources and destinations
    src_index = {id:node for (id, node) in node_index.items()
                 if id in src_ids}
    dest_index = {id:node for (id, node) in node_index.items()
                  if id in dest_ids}

    if interference:
        node_scrambler = SshNode(
            gateway=faraday, hostname=fitname(scrambler_id), username="******",
            formatter=TimeColonFormatter(), verbose=verbose_ssh)
    # the global scheduler
    scheduler = Scheduler(verbose=verbose_jobs)

    ##########
    check_lease = SshJob(
        scheduler=scheduler,
        node=faraday,
        verbose=verbose_jobs,
        label="rhubarbe check lease",
        command=Run("rhubarbe leases --check", label="rlease"),
    )

    # load images if requested

    green_light = check_lease

    # at some point we did not load the scrambler if interference was None
    # and that was a way to run faster loads with no interference
    # but now we always load the scrambler node with gnuradio
    # this is because when we do runs.py -i None 15 30 ...
    # then the first call to one_run is with interference being None
    # but it is still important to load the scrambler
    if load_images:
        # copy node_ids
        load_ids = node_ids[:]
        load_ids.append(scrambler_id)
        # the nodes that we **do not** use should be turned off
        # so if we have selected e.g. nodes 10 12 and 15, we will do
        # rhubarbe off -a ~10 ~12 ~15, meaning all nodes except 10, 12 and 15
        negated_node_ids = [f"~{id}" for id in load_ids]

        # we can do these three things in parallel
        ready_jobs = [
            SshJob(node=faraday, required=green_light,
                   scheduler=scheduler, verbose=verbose_jobs,
                   command=Run("rhubarbe", "off", "-a", *negated_node_ids,
                               label="turn off unused nodes")),
            SshJob(node=faraday, required=green_light,
                   scheduler=scheduler, verbose=verbose_jobs,
                   label="load batman image",
                   command=Run("rhubarbe", "load", "-i",
                               "batman-olsr",
                               *node_ids,
                               label=f"load ubuntu on {node_ids}")),
            SshJob(
                node=faraday, required=green_light,
                scheduler=scheduler, verbose=verbose_jobs,
                label="load gnuradio image",
                command=Run("rhubarbe", "load", "-i",
                            "batman-olsr-gnuradio",
                            scrambler_id,
                            label=f"load gnuradio on {scrambler_id}")),
        ]

        # replace green_light in this case
        green_light = SshJob(
            node=faraday, required=ready_jobs,
            scheduler=scheduler, verbose=verbose_jobs,
            label="wait for nodes to come up",
            command=Run("rhubarbe", "wait", *load_ids))

    ##########
    # setting up the wireless interface on all nodes
    #
    # provide node-utilities with the ranges/units it expects
    frequency = channel_frequency[int(channel)]
    # tx_power_in_mBm not in dBm
    tx_power_driver = tx_power * 100

    #just in case somme services failed in the previous experiment
    reset_failed_services_job = [
        SshJob(
            node=node,
            verbose=verbose_jobs,
            label="reset failed services",
            command=Run("systemctl reset-failed",
                        label="reset-failed services"))
        for id, node in node_index.items()
    ]
    reset_failed_services = Scheduler(
        *reset_failed_services_job,
        scheduler=scheduler,
        required=green_light,
        verbose=verbose_jobs,
        label="Reset failed services")
    init_wireless_sshjobs = [
        SshJob(
            node=node,
            verbose=verbose_jobs,
            label=f"init {id}",
            command=RunScript(
                "node-utilities.sh",
                f"init-ad-hoc-network-{WIRELESS_DRIVER}",
                WIRELESS_DRIVER, "foobar", frequency, phy_rate,
                antenna_mask, tx_power_driver,
                label="init add-hoc network"),
        )
        for id, node in node_index.items()]
    init_wireless_jobs = Scheduler(
        *init_wireless_sshjobs,
        scheduler=scheduler,
        required=green_light,
        verbose=verbose_jobs,
        label="Initialisation of wireless chips")

    if interference:
        # Run uhd_siggen with the chosen power
        init_scrambler_job = SshJob(
            scheduler=scheduler,
            required=green_light,
            forever=True,
            node=node_scrambler,
            verbose=verbose_jobs,
            #TODO : If exit-signal patch is done add exit-signal=["TERM"]
            #       to this run object and call uhd_siggen directly
            commands=[RunScript("node-utilities.sh",
                                "init-scrambler",
                                label="init scrambler"),
                      Run(f"systemd-run --unit=uhd_siggen -t ",
                          f"uhd_siggen -a usrp -f {frequency}M",
                          f"--sine --amplitude 0.{interference}",
                          label="systemctl start uhd_siggen")
                      ]
        )

    green_light = [init_wireless_jobs, reset_failed_services]
    # then install and run batman on fit nodes
    run_protocol_job = [
        SshJob(
            # scheduler=scheduler,
            node=node,
            label=f"init and run {protocol} on fit node {id}",
            verbose=verbose_jobs,
            # CAREFUL : These ones use sytemd-run
            #            with the ----service-type=forking option!
            command=RunScript("node-utilities.sh",
                              f"run-{protocol}",
                              label=f"run {protocol}"),
        )
        for id, node in node_index.items()]

    run_protocol = Scheduler(
        *run_protocol_job,
        scheduler=scheduler,
        required=green_light,
        verbose=verbose_jobs,
        label="init and run routing protocols")

    green_light = run_protocol

    # after that, run tcpdump on fit nodes, this job never ends...
    if tshark:

        run_tcpdump_job = [
            SshJob(
                # scheduler=scheduler_monitoring,
                node=node,
                forever=True,
                label=f"run tcpdump on fit node {id}",
                verbose=verbose_jobs,
                command=[
                    Run("systemd-run -t  --unit=tcpdump",
                        f"tcpdump -U -i moni-{WIRELESS_DRIVER}",
                        f"-y ieee802_11_radio -w /tmp/fit{id}.pcap",
                        label=f"tcpdump {id}")
                    ]
            )
            for id, node in node_index.items()
        ]

        run_tcpdump = Scheduler(
            *run_tcpdump_job,
            scheduler=scheduler,
            required=green_light,
            forever=True,
            verbose=verbose_jobs,
            label="Monitoring - tcpdumps")

    # let the wireless network settle
    settle_scheduler = Scheduler(
        scheduler=scheduler,
        required=green_light,
    )

    if warmup:
        # warmup pings don't need to be sequential, so let's
        # do all the nodes at the same time
        # on a given node though, we'll ping the other ends sequentially
        # see the graph for more
        warmup_jobs = [
            SshJob(
                node=node_s,
                verbose=verbose_jobs,
                commands=[
                    RunScript("node-utilities.sh",
                              "my-ping", f"10.0.0.{d}",
                              warmup_ping_timeout,
                              warmup_ping_interval,
                              warmup_ping_size,
                              warmup_ping_messages,
                              f"warmup {s} ➡︎ {d}",
                              label=f"warmup {s} ➡︎ {d}")
                    for d in dest_index.keys()
                    if s != d
                ]
            )
            # for each selected experiment nodes
            for s, node_s in src_index.items()
        ]
        warmup_scheduler = Scheduler(
            *warmup_jobs,
            scheduler=settle_scheduler,
            verbose=verbose_jobs,
            label="Warmup pings")
        settle_wireless_job2 = PrintJob(
            "Let the wireless network settle after warmup",
            sleep=settle_delay_shorter,
            scheduler=settle_scheduler,
            required=warmup_scheduler,
            label=f"settling-warmup for {settle_delay_shorter} sec")

    # this is a little cheating; could have gone before the bloc above
    # but produces a nicer graphical output
    # we might want to help asynciojobs if it offered a means
    # to specify entry and exit jobs in a scheduler
    settle_wireless_job = PrintJob(
        "Let the wireless network settle",
        sleep=settle_delay_long,
        scheduler=settle_scheduler,
        label=f"settling for {settle_delay_long} sec")

    green_light = settle_scheduler

    if iperf:
        iperf_service_jobs = [
            SshJob(
                node=node_d,
                verbose=verbose_jobs,
                forever=True,
                commands=[
                    Run("systemd-run -t --unit=iperf",
                        "iperf -s -p 1234 -u",
                        label=f"iperf serv on {d}"),
                ],
            )
            for d, node_d in dest_index.items()
        ]
        iperf_serv_sched = Scheduler(
            *iperf_service_jobs,
            verbose=verbose_jobs,
            label="Iperf Servers",
            # for a nicer graphical output
            # otherwise the exit arrow
            # from scheduler 'iperf mode'
            # to job 'settling for 60s'
            # gets to start from this box
            forever=True,
            )

        iperf_cli = [
            SshJob(
                node=node_s,
                verbose=verbose_jobs,
                commands=[
                    Run("sleep 7", label=""),
                    Run(f"iperf",
                        f"-c 10.0.0.{d} -p 1234",
                        f"-u -b {phy_rate}M -t 60",
                        f"-l 1024 > IPERF-{s:02d}-{d:02d}",
                        label=f"run iperf {s} ➡︎ {d}")
                ]
            )

            for s, node_s in src_index.items()
            for d, node_d in dest_index.items()
            if s != d
        ]
        iperf_cli_sched = Scheduler(
            Sequence(*iperf_cli),
            verbose=verbose_jobs,
            label="Iperf Clients")

        iperf_stop = [
            SshJob(node=node_d,
                   verbose=verbose_jobs,
                   label=f"Stop iperf on {d}",
                   command=Run("systemctl stop iperf"))
            for d, node_d in dest_index.items()
        ]
        iperf_stop_sched = Scheduler(
            *iperf_stop,
            required=iperf_cli_sched,
            verbose=verbose_jobs,
            label="Iperf server stop")
        iperf_fetch = [
            SshJob(node=node_s,
                   verbose=verbose_jobs,
                   command=Pull(
                       remotepaths=[f"IPERF-{s:02d}-{d:02d}"],
                       localpath=str(run_root),
                       label="fetch iperf {s} ➡︎ {d}")
                   )
            for s, node_s in src_index.items()
            for d, node_d in dest_index.items()
            if s != d
        ]
        iperf_fetch_sched = Scheduler(
            *iperf_fetch,
            required=iperf_stop_sched,
            verbose=verbose_jobs,
            label="Iperf fetch report")
        iperf_jobs = [iperf_serv_sched, iperf_cli_sched,
                      iperf_stop_sched, iperf_fetch_sched]
        iperf_sched = Scheduler(
            *iperf_jobs,
            scheduler=scheduler,
            required=green_light,
            verbose=verbose_jobs,
            label="Iperf Module")
        settle_wireless_job_iperf = PrintJob(
            "Let the wireless network settle",
            sleep=settle_delay_shorter,
            scheduler=scheduler,
            required=iperf_sched,
            label=f"settling-iperf for {settle_delay_shorter} sec")

        green_light = settle_wireless_job_iperf


    # create all the tracepath jobs from the first node in the list
    if map:
        map_jobs = [
            SshJob(
                node=node,
                label=f"Generating ROUTE file for proto {protocol} on node {id}",
                verbose=verbose_jobs,
                commands=[
                    RunScript(f"node-utilities.sh",
                              f"route-{protocol}",
                              f"> ROUTE-TABLE-{id:02d}",
                              label="get route table"),
                    Pull(remotepaths=[f"ROUTE-TABLE-{id:02d}"],
                         localpath=str(run_root),
                         label="")
                ],
            )
            for id, node in node_index.items()
        ]
        map_scheduler = Scheduler(
            *map_jobs,
            scheduler=scheduler,
            required=green_light,
            verbose=verbose_jobs,
            label="Snapshoting route files")
        green_light = map_scheduler

    if route_sampling:
        route_sampling_jobs = [
            SshJob(
                node=node,
                label=f"Route sampling service for proto {protocol} on node {id}",
                verbose=False,
                forever=True,
                commands=[
                    Push(localpaths=["route-sample-service.sh"],
                         remotepath=".", label=""),
                    Run("chmod +x route-sample-service.sh", label=""),
                    Run("systemd-run -t --unit=route-sample",
                        "/root/route-sample-service.sh",
                        "route-sample",
                        f"ROUTE-TABLE-{id:02d}-SAMPLED",
                        protocol,
                        label="start route-sampling"),
                ],
            )
            for id, node in node_index.items()
        ]
        route_sampling_scheduler = Scheduler(
            *route_sampling_jobs,
            scheduler=scheduler,
            verbose=False,
            forever=True,
            label="Route Sampling services launch",
            required=green_light)

    ##########
    # create all the ping jobs, i.e. max*(max-1)/2
    # this again is a python list comprehension
    # see the 2 for instructions at the bottom
    #
    # notice that these SshJob instances are not yet added
    # to the scheduler, we will add them later on
    # depending on the sequential/parallel strategy

    pings_job = [
        SshJob(
            node=node_s,
            verbose=verbose_jobs,
            commands=[
                Run(f"echo actual ping {s} ➡︎ {d} using {protocol}",
                    label=f"ping {s} ➡︎ {d}"),
                RunScript("node-utilities.sh", "my-ping",
                          f"10.0.0.{d}",
                          ping_timeout, ping_interval,
                          ping_size, ping_messages,
                          f"actual {s} ➡︎ {d}",
                          ">", f"PING-{s:02d}-{d:02d}",
                          label=""),
                Pull(remotepaths=[f"PING-{s:02d}-{d:02d}"],
                     localpath=str(run_root),
                     label=""),
            ],
        )
        # for each selected experiment nodes
        for s, node_s in src_index.items()
        for d, node_d in dest_index.items()
        if s != d
    ]
    pings = Scheduler(
        scheduler=scheduler,
        label="PINGS",
        verbose=verbose_jobs,
        required=green_light)

    # retrieve all pcap files from fit nodes
    stop_protocol_job = [
        SshJob(
            # scheduler=scheduler,
            node=node,
            # required=pings,
            label=f"kill routing protocol on {id}",
            verbose=verbose_jobs,
            command=RunScript(f"node-utilities.sh",
                              f"kill-{protocol}",
                              label=f"kill-{protocol}"),
        )
        for id, node in node_index.items()
    ]
    stop_protocol = Scheduler(
        *stop_protocol_job,
        scheduler=scheduler,
        required=pings,
        label="Stop routing protocols",
    )

    if tshark:
        retrieve_tcpdump_job = [
            SshJob(
                # scheduler=scheduler,
                node=nodei,
                # required=pings,
                label=f"retrieve pcap trace from fit{i:02d}",
                verbose=verbose_jobs,
                commands=[
                    Run("systemctl stop tcpdump",
                        label="stop tcpdump"),
                    #Run("systemctl reset-failed tcpdump"),
                    #RunScript("node-utilities.sh", "kill-tcpdump",
                    #          label="kill-tcpdump"),
                    Run(
                        f"echo retrieving pcap trace and result-{i}.txt from fit{i:02d}",
                        label=""),
                    Pull(remotepaths=[f"/tmp/fit{i}.pcap"],
                         localpath=str(run_root), label=""),
                ],
            )
            for i, nodei in node_index.items()
        ]
        retrieve_tcpdump = Scheduler(
            *retrieve_tcpdump_job,
            scheduler=scheduler,
            required=pings,
            label="Retrieve tcpdump",
        )
    if route_sampling:
        retrieve_sampling_job = [
            SshJob(
                # scheduler=scheduler,
                node=nodei,
                # required=pings,
                label=f"retrieve sampling trace from fit{i:02d}",
                verbose=verbose_jobs,
                commands=[
                    # RunScript("node-utilities.sh", "kill-route-sample", protocol,
                    #          label = "kill route sample"),
                    #RunScript("route-sample-service.sh", "kill-route-sample",
                    #          label="kill route sample"),
                    Run("systemctl stop route-sample",
                        label="stop route-sample"),
                    Run(
                        f"echo retrieving sampling trace from fit{i:02d}",
                        label=""),
                    Pull(remotepaths=[f"ROUTE-TABLE-{i:02d}-SAMPLED"],
                         localpath=str(run_root), label=""),
                ],
            )
            for i, nodei in node_index.items()
        ]
        retrieve_sampling = Scheduler(
            *retrieve_sampling_job,
            scheduler=scheduler,
            required=pings,
            verbose=verbose_jobs,
            label="Stop & retrieve route sampling",
            )
    if tshark:
        parse_pcaps_job = [
            SshJob(
                # scheduler=scheduler,
                node=LocalNode(),
                # required=retrieve_tcpdump,
                label=f"parse pcap trace {run_root}/fit{i}.pcap",
                verbose=verbose_jobs,
                #commands = [RunScript("parsepcap.sh", run_root, i)]
                command=Run("tshark", "-2", "-r",
                            f"{run_root}/fit{i}.pcap",
                            "-R",
                            f"'(ip.dst==10.0.0.{i} && icmp) && radiotap.dbm_antsignal'",
                            "-Tfields",
                            "-e", "'ip.src'",
                            "-e" "'ip.dst'",
                            "-e", "'radiotap.dbm_antsignal'",
                            ">", f"{run_root}/result-{i}.txt",
                            label=f"parsing pcap from {i}"),
            )
            for i in node_ids
        ]
        parse_pcaps = Scheduler(
            *parse_pcaps_job,
            scheduler=scheduler,
            required=retrieve_tcpdump,
            label="Parse pcap",
        )

    if interference:
        kill_uhd_siggen = SshJob(
            scheduler=scheduler,
            node=node_scrambler,
            required=pings,
            label=f"killing uhd_siggen on the scrambler node {scrambler_id}",
            verbose=verbose_jobs,
            commands=[Run("systemctl", "stop", "uhd_siggen"),
                      #Run("systemctl reset-failed tcpdump"),
                      ],
        )
        kill_2_uhd_siggen = SshJob(
            scheduler=scheduler,
            node=faraday,
            required=kill_uhd_siggen,
            label=f"turning off usrp on the scrambler node {scrambler_id}",
            verbose=verbose_jobs,
            command=Run("rhubarbe", "usrpoff", scrambler_id),
        )

    pings.add(Sequence(*pings_job))
    # for running sequentially we impose no limit on the scheduler
    # that will be limitied anyways by the very structure
    # of the required graph

    # safety check

    scheduler.export_as_pngfile(run_root / "experiment-graph")
    if dry_run:
        scheduler.list()
        return True

    # if not in dry-run mode, let's proceed to the actual experiment
    ok = scheduler.run()  # jobs_window=jobs_window)

    # close all ssh connections
    close_ssh_in_scheduler(scheduler)


    # give details if it failed
    if not ok:
        scheduler.debrief()
        scheduler.export_as_pngfile("debug")
    if ok and map:
        time_line("Creation of MAP files")
        post_processor = ProcessRoutes(run_root, src_ids, node_ids)
        post_processor.run()
    if ok and route_sampling:
        time_line("Creation of ROUTE SAMPLING files")
        post_processor = ProcessRoutes(run_root, src_ids, node_ids)
        post_processor.run_sampled()
    # data acquisition is done, let's aggregate results
    # i.e. compute averages
    #if ok and tshark:
        #post_processor = Aggregator(run_root, node_ids, antenna_mask)
        #post_processor.run()

    time_line("one_run done")
    return ok
Ejemplo n.º 15
0
 def test_format(self):
     s = Scheduler()
     f = TerminalFormatter("%Y:%H:%S - @host@:@line@", verbose=True)
     n = SshNode(localhostname(), username=localuser(), formatter=f)
     s.add(SshJob(node=n, commands=[Run("echo LINE1"), Run("echo LINE2")]))
     s.run()
Ejemplo n.º 16
0
    def main(self, *test_argv):  # pylint: disable=r0915,r0912,r0914,c0111
        self.parser = parser = argparse.ArgumentParser()
        # scope - on what hosts
        parser.add_argument(
            "-s",
            "--script",
            action='store_true',
            default=False,
            help=f"""If this flag is present, the first element of the remote
            command is assumed to be either the name of a local script, or,
            if this is not found, the body of a local script, that will be
            copied over before being executed remotely.
            In this case it should be executable.

            On the remote boxes it will be installed
            and run in the {default_remote_workdir} directory.
            """)
        parser.add_argument(
            "-i",
            "--includes",
            dest='includes',
            default=[],
            action='append',
            help="""for script mode only : a list of local files that are
            pushed remotely together with the local script,
            and in the same location; useful when you want to
            to run remotely a shell script that sources other files;
            remember that on the remote end all files (scripts and includes)
            end up in the same location""")
        parser.add_argument("-t",
                            "--target",
                            dest='targets',
                            action='append',
                            default=[],
                            help="""
            specify targets (additive); at least one is required;
            each target can be either
            * a space-separated list of hostnames
            * the name of a file containing hostnames
            * the name of a directory containing files named after hostnames;
            see e.g. the --mark option
            """)
        parser.add_argument("-x",
                            "--exclude",
                            dest='excludes',
                            action='append',
                            default=[],
                            help="""
            like --target, but for specifying exclusions;
            for now there no wildcard mechanism is supported here;
            also the order in which --target and --exclude options
            are mentioned does not matter;
            use --dry-run to only check for the list of applicable hosts
            """)
        # global settings
        parser.add_argument("-w",
                            "--window",
                            type=int,
                            default=0,
                            help="""
            specify how many connections can run simultaneously;
            default is no limit
            """)
        parser.add_argument(
            "-c",
            "--connect-timeout",
            dest='timeout',
            type=float,
            default=default_timeout,
            help=f"specify connection timeout, default is {default_timeout}s")
        # ssh settings
        parser.add_argument(
            "-l",
            "--login",
            default=default_username,
            help=f"remote user name - default is {default_username}")
        parser.add_argument("-k",
                            "--key",
                            dest='keys',
                            default=None,
                            action='append',
                            type=str,
                            help="""
            The default is for apssh to locate an ssh-agent
            through the SSH_AUTH_SOCK environment variable.
            If this cannot be found, or has an empty set of keys,
            then the user should specify private key file(s) - additive
            """)
        parser.add_argument("-K",
                            "--ok-if-no-key",
                            default=False,
                            action='store_true',
                            help="""
            When no key can be found, apssh won't even bother
            to try and connect. With this option it proceeds
            even with no key available.
            """)
        parser.add_argument("-g",
                            "--gateway",
                            default=None,
                            help="""
            specify a gateway for 2-hops ssh
            - either hostname or username@hostname
            """)
        # how to store results
        # terminal
        parser.add_argument("-r",
                            "--raw-format",
                            default=False,
                            action='store_true',
                            help="""
            produce raw result, incoming lines are shown as-is without hostname
            """)
        parser.add_argument(
            "-tc",
            "--time-colon-format",
            default=False,
            action='store_true',
            help="equivalent to --format '@time@:@host@:@line@")
        parser.add_argument("-f",
                            "--format",
                            default=None,
                            action='store',
                            help="""specify output format, which may include
* `strftime` formats like e.g. %%H-%%M, and one of the following:
* @user@ for the remote username,
* @host@ for the target hostname,
* @line@ for the actual line output (which contains the actual newline)
* @time@ is a shorthand for %%H-%%M-%%S""")

        # filesystem
        parser.add_argument("-o",
                            "--out-dir",
                            default=None,
                            help="specify directory where to store results")
        parser.add_argument("-d",
                            "--date-time",
                            default=None,
                            action='store_true',
                            help="use date-based directory to store results")
        parser.add_argument("-m",
                            "--mark",
                            default=False,
                            action='store_true',
                            help="""
            available with the -d and -o options only.

            When specified, then for all nodes there will be a file created
            in the output subdir, named either
            0ok/<hostname> for successful nodes,
            or 1failed/<hostname> for the other ones.

            This mark file will contain a single line with the returned code,
            or 'None' if the node was not reachable at all
            """)

        # usual stuff
        parser.add_argument("-n",
                            "--dry-run",
                            default=False,
                            action='store_true',
                            help="Only show details on selected hostnames")
        parser.add_argument("-v",
                            "--verbose",
                            action='store_true',
                            default=False)
        parser.add_argument("-D",
                            "--debug",
                            action='store_true',
                            default=False)
        parser.add_argument("-V",
                            "--version",
                            action='store_true',
                            default=False)

        # the commands to run
        parser.add_argument("commands",
                            nargs=argparse.REMAINDER,
                            type=str,
                            help="""
            command to run remotely.

            If the -s or --script option is provided, the first argument
            here should denote a (typically script) file **that must exist**
            on the local filesystem. This script is then copied over
            to the remote system and serves as the command for remote execution
            """)

        if test_argv:
            args = self.parsed_args = parser.parse_args(test_argv)
        else:
            args = self.parsed_args = parser.parse_args()

        # helpers
        if args.version:
            print(f"apssh version {apssh_version}")
            exit(0)

        # manual check for REMAINDER
        if not args.commands:
            print("You must provide a command to be run remotely")
            parser.print_help()
            exit(1)

        # load keys
        self.loaded_private_keys = load_private_keys(
            self.parsed_args.keys, args.verbose or args.debug)
        if not self.loaded_private_keys and not args.ok_if_no_key:
            print("Could not find any usable key - exiting")
            exit(1)

        # initialize a gateway proxy if --gateway is specified
        gateway = None
        if args.gateway:
            gwuser, gwhost = self.user_host(args.gateway)
            gateway = SshProxy(hostname=gwhost,
                               username=gwuser,
                               keys=self.loaded_private_keys,
                               formatter=self.get_formatter(),
                               timeout=self.parsed_args.timeout,
                               debug=self.parsed_args.debug)

        proxies = self.create_proxies(gateway)
        if args.verbose:
            print_stderr(f"apssh is working on {len(proxies)} nodes")

        window = self.parsed_args.window

        # populate scheduler
        scheduler = Scheduler(verbose=args.verbose)
        if not args.script:
            command_class = Run
            extra_kwds_args = {}
        else:
            # try RunScript
            command_class = RunScript
            extra_kwds_args = {'includes': args.includes}
            # but if the filename is not found then use RunString
            script = args.commands[0]
            if not Path(script).exists():
                if args.verbose:
                    print("Warning: file not found '{}'\n"
                          "=> Using RunString instead".format(script))
                command_class = RunString

        for proxy in proxies:
            scheduler.add(
                SshJob(node=proxy,
                       critical=False,
                       command=command_class(*args.commands,
                                             **extra_kwds_args)))

        # pylint: disable=w0106
        scheduler.jobs_window = window
        if not scheduler.run():
            scheduler.debrief()
        results = [job.result() for job in scheduler.jobs]

        ##########
        # print on stdout the name of the output directory
        # useful mostly with -d :
        subdir = self.get_formatter().run_name \
            if isinstance(self.get_formatter(), SubdirFormatter) \
            else None
        if subdir:
            print(subdir)

        # details on the individual retcods - a bit hacky
        if self.parsed_args.debug:
            for proxy, result in zip(proxies, results):
                print(f"PROXY {proxy.hostname} -> {result}")
        # marks
        names = {0: '0ok', None: '1failed'}
        if subdir and self.parsed_args.mark:
            # do we need to create the subdirs
            need_ok = [s for s in results if s == 0]
            if need_ok:
                os.makedirs(f"{subdir}/{names[0]}", exist_ok=True)
            need_fail = [s for s in results if s != 0]
            if need_fail:
                os.makedirs(f"{subdir}/{names[None]}", exist_ok=True)

            for proxy, result in zip(proxies, results):
                prefix = names[0] if result == 0 else names[None]
                mark_path = Path(subdir) / prefix / proxy.hostname
                with mark_path.open("w") as mark:
                    mark.write(f"{result}\n")

        # xxx - when in gateway mode, the gateway proxy never gets disconnected
        # which probably is just fine

        # return 0 only if all hosts have returned 0
        # otherwise, return 1
        failures = [r for r in results if r != 0]
        overall = 0 if not failures else 1
        return overall
Ejemplo n.º 17
0
def one_run(tx_power,
            phy_rate,
            antenna_mask,
            channel,
            interference,
            protocol,
            *,
            run_name=default_run_name,
            slicename=default_slicename,
            load_images=False,
            node_ids=None,
            verbose_ssh=False,
            verbose_jobs=False,
            dry_run=False,
            tshark=False,
            map=False,
            warmup=False,
            exp=default_exp,
            dest=default_node_ids,
            ping_number=default_ping_number,
            route_sampling=False):
    """
    Performs data acquisition on all nodes with the following settings

    Arguments:
        tx_power: in dBm, a string like 5, 10 or 14. Correspond to the transmission power.
        phy_rate: a string among 1, 54. Correspond to the wifi rate.
        antenna_mask: a string among 1, 3, 7.
        channel: a string like e.g. 1 or 40. Correspond to the channel.
        protocol: a string among batman , olsr. Correspond to the protocol
        interference : in dBm, a string like 60 or 50. Correspond to the power of the noise generated in the root.
        run_name: the name for a subdirectory where all data will be kept
                  successive runs should use the same name for further visualization
        slicename: the Unix login name (slice name) to enter the gateway
        load_images: a boolean specifying whether nodes should be re-imaged first
        node_ids: a list of node ids to run the scenario against; strings or ints are OK;
                  defaults to the nodes [1, 4, 5, 12, 19, 22,27 ,31, 33, 37]
        tshark: a boolean specifying wether we should format/parse the .pcap.
        map: a boolean specifying wether we should fetch/parse the route tables of the nodes.
        warmup: a boolean specifying wether we should run a ping before the experiment to be certain of the stabilisation on the network.
        exp: a list of nodes from which we will launch the ping from. strings or ints are OK.
                    default to the node [1]
        ping_number : The number of pings that will be generated
        
    """
    # set default for the nodes parameter
    node_ids = [int(id) for id in node_ids
                ] if node_ids is not None else default_node_ids
    exp_ids = [int(id) for id in exp] if exp is not None else default_exp
    dest_ids = [int(id)
                for id in dest] if dest is not None else default_node_ids
    #
    # dry-run mode
    # just display a one-liner with parameters
    #
    if dry_run:
        print("************************************")
        print("\n")
        run_root = naming_scheme(protocol,
                                 run_name,
                                 tx_power,
                                 phy_rate,
                                 antenna_mask,
                                 channel,
                                 interference,
                                 autocreate=False)
        load_msg = "" if not load_images else " LOAD"
        nodes = " ".join(str(n) for n in node_ids)
        exps = " ".join(str(n) for n in exp)
        pingst = [
            "PING{}-->{}".format(e, j) for e in exp_ids
            # and on the destination
            for j in node_ids if e != j  #and not
            #(j in exp_ids and j < e)
        ]

        print(
            "dry-run:{protocol} {run_name}{load_msg} -"
            " t{tx_power} r{phy_rate} a{antenna_mask} ch{channel} I{interference}-"
            "nodes {nodes}"
            " exp {exps}".format(**locals()))
        print(
            "\nNodes from which the experiment will be launched : \n{}\nList of pings generated:\n"
            .format(exps))
        print(pingst)
        print("\n")
        if warmup:
            print("Will do warmup pings\n")
        if tshark:
            print(
                "Will format data using tshark and will agregate the RSSI into one RSSI.txt file"
            )
        if map:
            print(
                "Will fetch the routing tables of the node (when stabilited) and will agregate the results\n"
            )
        if route_sampling:
            print("Will launch route sampling services on nodes")
        #print("Test creation of ROUTES files")
        #post_processor= ProcessRoutes(run_root, exp_ids, node_ids)
        #post_processor.run()
        #print("\nList of tracepaths generated:\n{}".format(tracepathst))
        # in dry-run mode we are done

    ###
    # create the logs directory based on input parameters
    run_root = naming_scheme(protocol,
                             run_name,
                             tx_power,
                             phy_rate,
                             antenna_mask,
                             channel,
                             interference,
                             autocreate=False)
    if (run_root.is_dir()):
        purgedir(run_root)
    run_root = naming_scheme(protocol,
                             run_name,
                             tx_power,
                             phy_rate,
                             antenna_mask,
                             channel,
                             interference,
                             autocreate=True)
    exp_info_file_name = run_root / "info.txt"
    with exp_info_file_name.open("w") as info_file:
        info_file.write("Selected nodes : \n")
        for node in node_ids[:-1]:
            info_file.write(f"{node} ")
        info_file.write(f"{node_ids[-1]}")
        info_file.write("\nSources : \n")
        for src in exp_ids[:-1]:
            info_file.write(f"{src} ")
        info_file.write(f"{exp_ids[-1]}")
        info_file.write("\nDestinations : \n")
        for dest in dest_ids[:-1]:
            info_file.write(f"{dest} ")
        info_file.write(f"{dest_ids[-1]}" + "\n")

    # the nodes involved
    faraday = SshNode(hostname=default_gateway,
                      username=slicename,
                      formatter=TimeColonFormatter(),
                      verbose=verbose_ssh)

    # this is a python dictionary that allows to retrieve a node object
    # from an id
    node_index = {
        id: SshNode(gateway=faraday,
                    hostname=fitname(id),
                    username="******",
                    formatter=TimeColonFormatter(),
                    verbose=verbose_ssh)
        for id in node_ids
    }
    if interference != "None":
        node_scrambler = SshNode(gateway=faraday,
                                 hostname=fitname(scrambler_id),
                                 username="******",
                                 formatter=TimeColonFormatter(),
                                 verbose=verbose_ssh)
    # the global scheduler
    scheduler = Scheduler(verbose=verbose_jobs)
    # if tshark:
    #scheduler_monitoring = Scheduler(verbose=verbose_jobs)
    #if interference != "None":
    #scheduler_interferences = Scheduler(verbose=verbose_jobs)

    ##########
    check_lease = SshJob(
        scheduler=scheduler,
        node=faraday,
        verbose=verbose_jobs,
        critical=True,
        label="rhubarbe check lease",
        command=Run("rhubarbe leases --check", label="rlease"),
        #keep_connection = True
    )

    # load images if requested

    green_light = check_lease

    if load_images:
        # the nodes that we **do not** use should be turned off
        # so if we have selected e.g. nodes 10 12 and 15, we will do
        # rhubarbe off -a ~10 ~12 ~15, meaning all nodes except 10, 12 and 15
        negated_node_ids = ["~{}".format(id) for id in node_ids]
        #Add the id of the scrambler in the list and load the gnuradio image
        negated_node_ids.append("~{}".format(scrambler_id))
        load_ids = [int(id) for id in node_ids
                    ] if node_ids is not None else default_node_ids
        load_ids.append(scrambler_id)
        # replace green_light in this case
        #We use a modified image of gnuradio where uhd_siggen handle the signal SIGTERM in order to finish properly
        green_light = SshJob(
            node=faraday,
            required=check_lease,
            #critical=True,
            scheduler=scheduler,
            verbose=verbose_jobs,
            label="rhubarbe load/wait on nodes {}".format(load_ids),
            commands=[
                Run("rhubarbe",
                    "off",
                    "-a",
                    *negated_node_ids,
                    label="roff {}".format(negated_node_ids)),
                Run("rhubarbe",
                    "load",
                    *node_ids,
                    label="rload {}".format(node_ids)),
                Run("rhubarbe",
                    "load",
                    "-i",
                    "gnuradio_batman",
                    scrambler_id,
                    label="load gnuradio batman on {}".format(scrambler_id)),
                Run("rhubarbe", "wait", *load_ids, label="rwait")
            ],
            #keep_connection = True
        )

    ##########
    # setting up the wireless interface on all nodes
    #
    # this is a python feature known as a list comprehension
    # we just create as many SshJob instances as we have
    # (id, SshNode) couples in node_index
    # and gather them all in init_wireless_jobs
    # they all depend on green_light
    #
    # provide node-utilities with the ranges/units it expects
    frequency = channel_frequency[int(channel)]
    # tx_power_in_mBm not in dBm
    tx_power_driver = tx_power * 100

    init_wireless_sshjobs = [
        SshJob(
            #scheduler=scheduler,
            #required=green_light,
            node=node,
            verbose=verbose_jobs,
            label="init {}".format(id),
            command=RunScript("node-utilities.sh",
                              "init-ad-hoc-network-{}".format(wireless_driver),
                              wireless_driver,
                              "foobar",
                              frequency,
                              phy_rate,
                              antenna_mask,
                              tx_power_driver,
                              label="init add-hoc network"),
            #keep_connection = True
        ) for id, node in node_index.items()
    ]
    init_wireless_jobs = Scheduler(
        *init_wireless_sshjobs,
        scheduler=scheduler,
        required=green_light,
        #critical = True,
        verbose=verbose_jobs,
        label="Initialisation of wireless chips")

    green_light_prot = init_wireless_jobs
    if interference != "None":
        #Run uhd_siggen with the chosen power
        frequency_str = frequency / 1000
        frequency_str = str(frequency_str) + "G"
        init_scrambler_job = [
            SshJob(
                forever=True,
                node=node_scrambler,
                verbose=verbose_jobs,
                label="init scrambler on node {}".format(scrambler_id),
                command=RunScript("node-utilities.sh",
                                  "init-scrambler",
                                  interference,
                                  frequency_str,
                                  label="init scambler"),
                #keep_connection = True
            )
        ]
        init_scrambler = Scheduler(
            *init_scrambler_job,
            scheduler=scheduler,
            required=green_light,
            #forever = True,
            #critical = True,
            verbose=verbose_jobs,
            label="Running interference")
    # then install and run batman on fit nodes
    run_protocol_job = [
        SshJob(
            #scheduler=scheduler,
            node=node,
            #required=green_light_prot,
            label="init and run {} on fit node {}".format(protocol, i),
            verbose=verbose_jobs,
            command=RunScript("node-utilities.sh",
                              "run-{}".format(protocol),
                              label="run {}".format(protocol)),
            #keep_connection = True
        ) for i, node in node_index.items()
    ]

    run_protocol = Scheduler(
        *run_protocol_job,
        scheduler=scheduler,
        required=green_light_prot,
        #critical = True,
        verbose=verbose_jobs,
        label="init and run routing protocols")

    # after that, run tcpdump on fit nodes, this job never ends...
    if tshark:
        run_tcpdump_job = [
            SshJob(
                #scheduler=scheduler_monitoring,
                node=node,
                forever=True,
                label="run tcpdump on fit node".format(i),
                verbose=verbose_jobs,
                commands=[
                    RunScript("node-utilities.sh",
                              "run-tcpdump",
                              wireless_driver,
                              i,
                              label="run tcpdump")
                ],
                #keep_connection = True
            ) for i, node in node_index.items()
        ]
        run_tcpdump = Scheduler(
            *run_tcpdump_job,
            scheduler=scheduler,
            required=run_protocol,
            forever=True,
            #critical = True,
            verbose=verbose_jobs,
            label="Monitoring (tcpdum) Jobs")
    # let the wireless network settle
    settle_wireless_job = PrintJob(
        "Let the wireless network settle",
        sleep=settle_delay,
        scheduler=scheduler,
        required=run_protocol,
        label="settling for {} sec".format(settle_delay))

    green_light_experiment = settle_wireless_job

    if warmup:
        warmup_pings_job = [
            SshJob(
                node=nodei,
                #required=green_light_experiment,
                label="warmup ping {} -> {}".format(i, j),
                verbose=verbose_jobs,
                commands=[
                    Run("echo {} '->' {}".format(i, j),
                        label="ping {} '->' {}".format(i, j)),
                    RunScript("node-utilities.sh",
                              "my-ping",
                              "10.0.0.{}".format(j),
                              ping_timeout,
                              ping_interval,
                              ping_size,
                              ping_number,
                              label="")
                ],
                #keep_connection = True
            )
            #for each selected experiment nodes
            for e in exp_ids
            # looping on the source (to get the correct sshnodes)
            for i, nodei in node_index.items()
            # and on the destination
            for j, nodej in node_index.items()
            # and keep only sources that are in the selected experiment nodes and remove destination that are themselves
            # and remove the couples that have already be done
            #    print("i {index} exp {expe}".format(index = i, expe= exp))
            if (i == e) and e != j and not (j in exp_ids and j < e)
        ]
        warmup_pings = Scheduler(
            Sequence(*warmup_pings_job),
            scheduler=scheduler,
            required=green_light_experiment,
            #critical = True,
            verbose=verbose_jobs,
            label="Warmup ping")
        settle_wireless_job2 = PrintJob(
            "Let the wireless network settle",
            sleep=settle_delay / 2,
            scheduler=scheduler,
            required=warmup_pings,
            label="settling-warmup for {} sec".format(settle_delay / 2))

        green_light_experiment = settle_wireless_job2
    ##########
    # create all the tracepath jobs from the first node in the list
    #
    if map:
        routes_job = [
            SshJob(
                node=nodei,
                #scheduler=scheduler,
                #required=green_light_experiment,
                label="Generating ROUTE file for prot {} on node {}".format(
                    protocol, i),
                verbose=verbose_jobs,
                commands=[
                    RunScript("node-utilities.sh",
                              "route-{}".format(protocol),
                              ">",
                              "ROUTE-TABLE-{:02d}".format(i),
                              label="get route table"),
                    Pull(remotepaths="ROUTE-TABLE-{:02d}".format(i),
                         localpath=str(run_root),
                         label="")
                ],
                #keep_connection = True
            ) for i, nodei in node_index.items()
        ]
        routes = Scheduler(
            *routes_job,
            scheduler=scheduler,
            required=green_light_experiment,
            #critical = True,
            verbose=verbose_jobs,
            label="Snapshoting route files")
        green_light_experiment = routes

    if route_sampling:
        routes_sampling_job2 = [
            SshJob(
                node=nodei,
                label="Route sampling service for prot {} on node {}".format(
                    protocol, i),
                verbose=False,
                #forever = True,
                commands=[
                    Push(localpaths=["route_sample_service.sh"],
                         remotepath=".",
                         label=""),
                    Run("source",
                        "route_sample_service.sh;",
                        "route-sample",
                        "ROUTE-TABLE-{:02d}-SAMPLED".format(i),
                        "{}".format(protocol),
                        label="run route sampling service"),
                ],
                #keep_connection = True
            ) for i, nodei in node_index.items()
        ]
        routes_sampling_job = [
            SshJob(
                node=nodei,
                label="Route sampling service for prot {} on node {}".format(
                    protocol, i),
                verbose=False,
                forever=True,
                #critical = True,
                #required = green_light_experiment,
                #scheduler = scheduler,
                commands=[
                    RunScript("route_sample_service.sh",
                              "route-sample",
                              "ROUTE-TABLE-{:02d}-SAMPLED".format(i),
                              "{}".format(protocol),
                              label="run route sampling service"),
                ],
                #keep_connection = True
            ) for i, nodei in node_index.items()
        ]
        routes_sampling = Scheduler(
            *routes_sampling_job,
            scheduler=scheduler,
            verbose=False,
            forever=True,
            #critical = True,
            label="Route Sampling services launch",
            required=green_light_experiment)
    ##########
    # create all the ping jobs, i.e. max*(max-1)/2
    # this again is a python list comprehension
    # see the 2 for instructions at the bottom
    #
    # notice that these SshJob instances are not yet added
    # to the scheduler, we will add them later on
    # depending on the sequential/parallel strategy

    pings_job = [
        SshJob(
            node=nodei,
            #required=green_light_experiment,
            label="ping {} -> {}".format(i, j),
            verbose=verbose_jobs,
            commands=[
                Run("echo {} '->' {}".format(i, j),
                    label="ping {}'->' {}".format(i, j)),
                RunScript("node-utilities.sh",
                          "my-ping",
                          "10.0.0.{}".format(j),
                          ping_timeout,
                          ping_interval,
                          ping_size,
                          ping_number,
                          ">",
                          "PING-{:02d}-{:02d}".format(i, j),
                          label=""),
                Pull(remotepaths="PING-{:02d}-{:02d}".format(i, j),
                     localpath=str(run_root),
                     label=""),
            ],
            #keep_connection = True
        )
        #for each selected experiment nodes
        for e in exp_ids
        # looping on the source (to get the correct sshnodes)
        for i, nodei in node_index.items()
        # and on the destination
        for j in dest_ids
        # and keep only sources that are in the selected experiment nodes and remove destination that are themselves
        # and remove the couples that have already be done
        if (i == e) and e != j and not (j in exp_ids and j < e)
    ]
    pings = Scheduler(
        scheduler=scheduler,
        label="PINGS",
        #critical = True,
        verbose=verbose_jobs,
        required=green_light_experiment)

    # retrieve all pcap files from fit nodes
    stop_protocol_job = [
        SshJob(
            #scheduler=scheduler,
            node=nodei,
            #required=pings,
            label="kill routing protocol on fit{:02d}".format(i),
            verbose=verbose_jobs,
            #critical = True,
            commands=[
                RunScript("node-utilities.sh",
                          "kill-{}".format(protocol),
                          label="kill-{}".format(protocol)),
            ],
            #keep_connection = False
        ) for i, nodei in node_index.items()
    ]
    stop_protocol = Scheduler(
        *stop_protocol_job,
        scheduler=scheduler,
        required=pings,
        #critical = True,
        label="Stop routing protocols",
    )

    if tshark:
        retrieve_tcpdump_job = [
            SshJob(
                #scheduler=scheduler,
                node=nodei,
                #required=pings,
                label="retrieve pcap trace from fit{:02d}".format(i),
                verbose=verbose_jobs,
                #critical = True,
                commands=[

                    # RunScript("node-utilities.sh", "kill-{}".format(protocol), label = "kill-{}".format(protocol)),
                    RunScript("node-utilities.sh",
                              "kill-tcpdump",
                              label="kill-tcpdump"),
                    #Run("sleep 1"),
                    Run("echo retrieving pcap trace and result-{i}.txt from fit{i:02d}"
                        .format(i=i),
                        label=""),
                    Pull(remotepaths=["/tmp/fit{}.pcap".format(i)],
                         localpath=str(run_root),
                         label=""),
                ],
                #keep_connection = True
            ) for i, nodei in node_index.items()
        ]
        retrieve_tcpdump = Scheduler(
            *retrieve_tcpdump_job,
            scheduler=scheduler,
            required=pings,
            #critical = True,
            label="Retrieve tcpdump",
        )
    if route_sampling:
        retrieve_sampling_job = [
            SshJob(
                #scheduler=scheduler,
                node=nodei,
                #required=pings,
                label="retrieve sampling trace from fit{:02d}".format(i),
                verbose=verbose_jobs,
                #critical = True,
                commands=[
                    #RunScript("node-utilities.sh", "kill-route-sample", protocol,
                    #          label = "kill route sample"),
                    RunScript("route_sample_service.sh",
                              "kill-route-sample",
                              label="kill route sample"),
                    Run("echo retrieving sampling trace from fit{i:02d}".
                        format(i=i),
                        label=""),
                    Pull(remotepaths=["ROUTE-TABLE-{:02d}-SAMPLED".format(i)],
                         localpath=str(run_root),
                         label=""),
                ],
                #keep_connection = True
            ) for i, nodei in node_index.items()
        ]
        retrieve_sampling = Scheduler(
            *retrieve_sampling_job,
            scheduler=scheduler,
            required=pings,
            #critical=True,
            verbose=verbose_jobs,
            label="Retrieve & stopping route sampling",
        )
    if tshark:
        parse_pcaps_job = [
            SshJob(
                #scheduler=scheduler,
                node=LocalNode(),
                #required=retrieve_tcpdump,
                label="parse pcap trace {path}/fit{node}.pcap".format(
                    path=run_root, node=i),
                verbose=verbose_jobs,
                #commands = [RunScript("parsepcap.sh", run_root, i)]
                commands=[
                    Run("tshark",
                        "-2",
                        "-r",
                        "{path}/fit{node}.pcap".format(path=run_root, node=i),
                        "-R",
                        "'(ip.dst==10.0.0.{node} && icmp) && radiotap.dbm_antsignal'"
                        .format(node=i),
                        "-Tfields",
                        "-e",
                        "'ip.src'", "-e"
                        "'ip.dst'",
                        "-e",
                        "'radiotap.dbm_antsignal'",
                        ">",
                        "{path}/result-{node}.txt".format(path=run_root,
                                                          node=i),
                        label="parse pcap locally")
                ],
                #keep_connection = True
            ) for i in node_ids
        ]
        parse_pcaps = Scheduler(
            *parse_pcaps_job,
            scheduler=scheduler,
            required=retrieve_tcpdump,
            #critical=True,
            label="Parse pcap",
        )
#TODO: TURN OFF USRP

    if interference != "None":
        kill_uhd_siggen = SshJob(
            scheduler=scheduler,
            node=node_scrambler,
            required=pings,
            label="killing uhd_siggen on the scrambler node {}".format(
                scrambler_id),
            verbose=verbose_jobs,
            #critical = True,
            commands=[Run("pkill", "uhd_siggen")],
            #keep_connection = True
        )
        kill_2_uhd_siggen = SshJob(
            scheduler=scheduler,
            node=faraday,
            required=kill_uhd_siggen,
            label="turning off usrp on the scrambler node {}".format(
                scrambler_id),
            verbose=verbose_jobs,
            commands=[
                Run("rhubarbe", "usrpoff", "fit{}".format(scrambler_id))
            ],
            #keep_connection = True
        )
#if map:
#scheduler.add(Sequence(*tracepaths, scheduler=scheduler))
#if warmup:
#       scheduler.add(Sequence(*warmup_pings_job, scheduler=scheduler))
    pings.add(Sequence(*pings_job))
    # for running sequentially we impose no limit on the scheduler
    # that will be limitied anyways by the very structure
    # of the required graph
    #jobs_window = None
    if dry_run:
        scheduler.export_as_pngfile(run_root / "experiment_graph")
        return True
    # if not in dry-run mode, let's proceed to the actual experiment

    ok = scheduler.orchestrate()  #jobs_window=jobs_window)

    scheduler.shutdown()
    dot_file = run_root / "experiment_graph"
    if not dot_file.is_file():
        scheduler.export_as_dotfile(dot_file)
        #TODO : Is it necessary? if the user want to see it he can just do it?
        #call(["dot",  "-Tpng", dot_file, "-o", run_root / "experitment_graph.png"])


#ok=True
#ok = False
# give details if it failed
    if not ok:
        scheduler.debrief()
        scheduler.export_as_dotfile("debug")
    if ok and map:
        print("Creation of ROUTES files")
        post_processor = ProcessRoutes(run_root, exp_ids, node_ids)
        post_processor.run()
    if ok and route_sampling:
        post_processor = ProcessRoutes(run_root, exp_ids, node_ids)
        post_processor.run_sampled()
    print("END of creation for ROUTES FILES")
    # data acquisition is done, let's aggregate results
    # i.e. compute averages
    if ok and tshark:
        post_processor = Aggregator(run_root, node_ids, antenna_mask)
        post_processor.run()

    return ok