def test_sal(self, cmdopt):
        #if __name__ == "__main__":

        resource = cmdopt
        home = expanduser("~")
        try:

            with open('%s/workspace/EnsembleMDTesting/config.json' %
                      home) as data_file:
                config = json.load(data_file)

            print 'Project: ', config[resource]['project']
            print 'Username: '******'username']

            # Create a new static execution context with one resource and a fixed
            # number of cores and runtime.
            cluster = SingleClusterEnvironment(
                resource=resource,
                cores=1,
                walltime=15,
                username=config[resource]['username'],
                project=config[resource]['project'],
                access_schema=config[resource]['schema'],
                queue=config[resource]['queue'],
                database_url=
                'mongodb://*****:*****@ds051585.mlab.com:51585/rutgers_thesis',
                #database_name='myexps',
            )

            # Allocate the resources.
            cluster.allocate()
            randomsa = RandomSA(maxiterations=1,
                                simulation_instances=1,
                                analysis_instances=1)
            cluster.run(randomsa)
            cluster.deallocate()

            # After execution has finished, we print some statistical information
            # extracted from the analysis results that were transferred back.
            for it in range(1, randomsa.iterations + 1):
                print "\nIteration {0}".format(it)
                ldists = []
                for an in range(1, randomsa.analysis_instances + 1):
                    ldists.append(
                        int(
                            open("analysis-{0}-{1}.dat".format(it, an),
                                 "r").readline()))
                print "   * Levenshtein Distances: {0}".format(ldists)
                print "   * Mean Levenshtein Distance: {0}".format(
                    sum(ldists) / len(ldists))
            assert os.path.isfile(
                "%s/workspace/EnsembleMDTesting/E2E_test/analysis-1-1.dat" %
                home)
            os.remove(
                "%s/workspace/EnsembleMDTesting/E2E_test/analysis-1-1.dat" %
                home)
        except EnsemblemdError, er:

            print "Ensemble MD Toolkit Error: {0}".format(str(er))
            raise  # Just raise the execption again to get the backtrace
    def test_copy_input_data_single(self):
        #if __name__ == '__main__':

        #resource = 'local.localhost'
        try:

            with open('%s/config.json' %
                      os.path.dirname(os.path.abspath(__file__))) as data_file:
                config = json.load(data_file)

            # Create a new static execution context with one resource and a fixed
            # number of cores and runtime.

            cluster = SingleClusterEnvironment(
                resource='xsede.stampede',
                cores=1,
                walltime=15,
                #username=None,
                username='******',
                project='TG-MCB090174',
                access_schema='ssh',
                queue='development',

                #project=config[resource]['project'],
                #access_schema = config[resource]['schema'],
                #queue = config[resource]['queue'],
                database_url=
                'mongodb://*****:*****@ds051585.mlab.com:51585/rutgers_thesis'
            )

            os.system('/bin/echo passwd > input_file.txt')

            # Allocate the resources.
            cluster.allocate(wait=True)

            # Set the 'instances' of the pipeline to 1. This means that 1 instance
            # of each pipeline step is executed.
            ##        app = _TestMyApp(instances=1,
            ##                         copy_directives="/etc/passwd",
            ##                         checksum_inputfile="passwd",
            ##                         download_output="CHKSUM_1"
            ##                         )
            app = _TestMyApp(steps=1, instances=1)
            cluster.run(app)
            f = open("./output_file.txt")
            print "Name of file: ", f.name
            print "file closed or not: ", f.closed
            fname = f.readline().split()
            print "fname: ", fname
            cluster.deallocate()
            assert fname == ['passwd']
            f.close()
            os.remove("./output_file.txt")

        except Exception as er:
            print "Ensemble MD Toolkit Error: {0}".format(str(er))
            raise  # Just raise the execption again to get the backtrace
 def test_name(self):
     from radical.ensemblemd import SingleClusterEnvironment
     sec = SingleClusterEnvironment(
         resource="local.localhost",
         cores=1,
         walltime=1,
         database_url='mongodb://localhost:27017',
         database_name='dbname')
     assert sec.name == 'Static'
Esempio n. 4
0
    def test__copy_input_data_single(self):
        """Check if we can copy output data to a different location on the execution host - single input.
        """
        cluster = SingleClusterEnvironment(resource="localhost",
                                           cores=1,
                                           walltime=5)

        test = _TestCopyOutputData_Pattern(instances=1,
                                           output_copy_directives=[
                                               "checksum.txt > {0}".format(
                                                   self._output_dir)
                                           ])
        cluster.allocate()
        cluster.run(test)
    def test__single_cluster_environment_api(self):
        """ Test the single cluster environment API.
        """

        from radical.ensemblemd import SingleClusterEnvironment

        sec = SingleClusterEnvironment(resource="localhost",
                                       cores=1,
                                       walltime=1)

        try:
            sec.allocate()
            sec.run("wrong_type")
            assert False, "TypeError execption expected."
        except Exception, ex:
            pass
    def test_all_pairs_remote(self,cmdopt):
#if __name__ == "__main__":


    # use the resource specified as argument, fall back to localhost
        resource = cmdopt
        home = expanduser("~")
        try:

            with open('%s/workspace/EnsembleMDTesting/config.json'%home) as data_file:    
                config = json.load(data_file)
            print 'project: ', config[resource]['project']
            print 'username: '******'username']
            # Create a new static execution context with one resource and a fixed
            # number of cores and runtime.
            cluster = SingleClusterEnvironment(
                            resource=resource,
                            cores=1,
                            walltime=15,
                            username=config[resource]['username'],

                            project=config[resource]['project'],
                            access_schema = config[resource]['schema'],
                            queue = config[resource]['queue'],

                            database_url='mongodb://*****:*****@ds051585.mlab.com:51585',
                            database_name='rutgers_thesis',
            )

            # Allocate the resources.
            cluster.allocate()

            # For example the set has 5 elements.
            ElementsSet1 = range(1,2)
            randAP = _TestRandomAP(set1elements=ElementsSet1,windowsize1=1)

            cluster.run(randAP)

            cluster.deallocate()
            print "Pattern Execution Completed Successfully! Result files are downloaded!"
            assert os.path.isfile("./comparison_1_1.log")
            os.remove("./comparison_1_1.log")
            
        except EnsemblemdError, er:

            print "Ensemble MD Toolkit Error: {0}".format(str(er))
            raise # Just raise the execption again to get the backtrace
Esempio n. 7
0
    def test__link_input_data_multi(self):
        """Check if we can link input data from a location on the execution host - multiple input.
        """
        cluster = SingleClusterEnvironment(
            resource="localhost",
            cores=1,
            walltime=15
        )

        test = _TestLinkInputData_Pattern(
            instances=1,
            link_directives=["/etc/passwd", "/etc/group"],
            checksum_inputfile="passwd",
            download_output="CHKSUM_3"
        )
        cluster.allocate()
        cluster.run(test)
Esempio n. 8
0
    def test__link_input_data_single_rename(self):
        """Check if we can link input data from a location on the execution host - single input with rename.
        """
        cluster = SingleClusterEnvironment(
            resource="localhost",
            cores=1,
            walltime=15
        )

        test = _TestLinkInputData_Pattern(
            instances=1,
            link_directives="/etc/passwd > input",
            checksum_inputfile="input",
            download_output="CHKSUM_2"
        )
        cluster.allocate()
        cluster.run(test)
def enmd_setup_run(request):
    from radical.ensemblemd import SingleClusterEnvironment
    try:
        sec = SingleClusterEnvironment(
            resource="local.localhost",
            cores=1,
            walltime=1,
            database_url=
            'mongodb://*****:*****@ds051585.mongolab.com:51585',
            database_name='rutgers_thesis')
        test = _TestRun(steps=1, instances=1)
        ret_allocate = sec.allocate()
        ret_run = sec.run(test)
        ret_deallocate = sec.deallocate()
    except Exception as e:
        print ret_run
        raise
    return ret_allocate, ret_run, ret_deallocate
Esempio n. 10
0
    def test__upload_input_data_single(self):
        """Check if we can upload input data from a location on the host running these tests - single input.
        """
        cluster = SingleClusterEnvironment(resource="localhost",
                                           cores=1,
                                           walltime=5)

        test = _TestUploadInputData_Pattern(instances=1,
                                            upload_directives="/etc/passwd",
                                            checksum_inputfile="passwd",
                                            download_output="CHKSUM_1")
        cluster.allocate()
        cluster.run(test)

        f = open("./CHKSUM_1")
        csum, fname = f.readline().split()
        assert "passwd" in fname
        f.close()
        os.remove("./CHKSUM_1")
def enmd_setup():
    from radical.ensemblemd import SingleClusterEnvironment
    try:
        sec = SingleClusterEnvironment(
            resource="local.localhost",
            cores=1,
            walltime=1,
            database_url=
            'mongodb://*****:*****@ds051585.mongolab.com:51585',
            database_name='rutgers_thesis')
        ret_allocate = sec.allocate(wait=True)
        ret_deallocate = False
        ret_deallocate = sec.deallocate()

    except Exception as e:
        print 'test failed'
        raise

    return ret_allocate, ret_deallocate
Esempio n. 12
0
    def test__copy_input_data_multi(self):
        """Check if we can copy input data from a location on the execution host - multiple input.
        """
        cluster = SingleClusterEnvironment(resource="localhost",
                                           cores=1,
                                           walltime=15)

        test = _TestCopyInputData_Pattern(
            instances=1,
            copy_directives=["/etc/passwd", "/etc/group"],
            checksum_inputfile="passwd",
            download_output="CHKSUM_3")
        cluster.allocate()
        cluster.run(test)

        f = open("./CHKSUM_3")
        csum, fname = f.readline().split()
        assert "passwd" in fname
        f.close()
        os.remove("./CHKSUM_3")
Esempio n. 13
0
    def test__throw_on_malformed_kernel(self):
        """Test if an exception is thrown in case things go wrong in the Pipline pattern.
        """
        try:
            # Create a new static execution context with one resource and a fixed
            # number of cores and runtime.
            cluster = SingleClusterEnvironment(resource="localhost",
                                               cores=1,
                                               walltime=1,
                                               username=None,
                                               allocation=None)

            ccount = _FaultyPattern(instances=1)
            cluster.run(ccount)

            assert False, "Expected exception due to malformed URL in Pattern description."

        except EnsemblemdError, er:
            # Exception should pop up.
            assert True
Esempio n. 14
0
def enmd_setup():
    from radical.ensemblemd import SingleClusterEnvironment
    try:
        sec = SingleClusterEnvironment(
            resource="xsede.stampede",
            cores=1,
            walltime=1,
            username='******',
            project='TG-MCB090174',
            access_schema='ssh',
            queue='development',
            database_url='mongodb://*****:*****@ds051585.mlab.com:51585',
            database_name='rutgers_thesis')
        ret_allocate = sec.allocate(wait=True)
        ret_deallocate = False
        ret_deallocate = sec.deallocate()

    except Exception as e:
        print 'test failed'
        raise

    return ret_allocate, ret_deallocate
Esempio n. 15
0
    def test__simulation_analysis_loop_profiling(self):
        """ Tests the Pipeline execution pattern API.
        """
        cluster = SingleClusterEnvironment(resource="localhost",
                                           cores=1,
                                           walltime=30,
                                           username=None,
                                           allocation=None)
        # wait=True waits for the pilot to become active
        # before the call returns. This is not useful when
        # you want to take advantage of the queueing time /
        # file-transfer overlap, but it's useful for baseline
        # performance profiling of a specific pattern.
        cluster.allocate(wait=True)

        nopsa = _NopSA(maxiterations=1,
                       simulation_instances=4,
                       analysis_instances=4,
                       idle_time=10)
        cluster.run(nopsa)

        pdct = nopsa.execution_profile_dict
        dfrm = nopsa.execution_profile_dataframe
Esempio n. 16
0
def enmd_setup_run(request):
    from radical.ensemblemd import SingleClusterEnvironment
    try:
        sec = SingleClusterEnvironment(
            #resource="local.localhost",
            #cores=1,
            #walltime=1,
            resource="xsede.stampede",
            cores=1,
            walltime=1,
            username='******',
            project='TG-MCB090174',
            access_schema='ssh',
            queue='development',
            database_url='mongodb://*****:*****@ds051585.mlab.com:51585',
            database_name='rutgers_thesis')
        test = _TestRun(steps=1, instances=1)
        ret_allocate = sec.allocate()
        ret_run = sec.run(test)
        ret_deallocate = sec.deallocate()
    except Exception as e:
        #print ret_run
        raise
    return ret_allocate, ret_run, ret_deallocate
Esempio n. 17
0
                '/home/suvigya/radical.ensemblemd-master/tests/tests/config.json'
        ) as data_file:
            config = json.load(data_file)
        #resource='xsede.stampede'
        print "project: ", config[resource]['project']
        print "username: "******"TG-MCB090174",
            #access_schema="ssh",
            #queue="development",
            project=config[resource]['project'],
            access_schema=config[resource]['schema'],
            queue=config[resource]['queue'],
            database_url=
            'mongodb://*****:*****@ds051585.mongolab.com:51585',
            database_name='rutgers_thesis',
        )

        os.system('/bin/echo remote > input_file.txt')

        # Allocate the resources.
        cluster.allocate()

        # Set the 'instances' of the pipeline to 1. This means that 1 instance
        # of each pipeline step is executed.
Esempio n. 18
0
        random.shuffle(pairings)

        pp = pprint.PrettyPrinter()
        pp.pprint(pairings)

        for item in pairings:
            core_count = item[0]
            iteration = item[1]
            # core_count = 1
            # iteration = 24
            instance_count = core_count
            cluster = SingleClusterEnvironment(
                resource="xsede.stampede",
                cores=core_count,
                walltime=30,
                username="******",
                project="TG-MCB090174",
                database_url=os.environ.get('RADICAL_PILOT_DBURL'),
                database_name='enmddb',
                queue="development")

            cluster.allocate()
            randomsa = RandomSA(maxiterations=1,
                                simulation_instances=instance_count,
                                analysis_instances=instance_count)
            cluster.run(randomsa)
            cluster.deallocate()

            new_core = "enmd_core_overhead_{0}_{1}.csv".format(
                core_count, iteration)
            new_pattern = "enmd_pat_overhead_{0}_{1}.csv".format(
Esempio n. 19
0

# ------------------------------------------------------------------------------
#
if __name__ == "__main__":

    try:

        # Create a new static execution context with one resource and a fixed
        # number of cores and runtime.
        cluster = SingleClusterEnvironment(
            resource="local.localhost",
            cores=16,
            walltime=5,
            #username='******',

            #project = None,
            #queue = None,

            #database_url=None,
            #database_name='myexps',
        )

        # Allocate the resources.
        cluster.allocate()

        # We set the simulation 'instances' to 16 and analysis 'instances' to 1. We set the adaptive
        # simulation to True.
        mssa = MSSA(iterations=2, simulation_instances=16, analysis_instances=1, adaptive_simulation=True)

        cluster.run(mssa)
Esempio n. 20
0
        traj_count = traj_numpy.shape[0]

        data = open('input.txt', 'w')

        for coordinates in traj_numpy:
            data.write('%s,%s,%s\n' %
                       (coordinates.tolist()[0], coordinates.tolist()[1],
                        coordinates.tolist()[2]))

        data.close()

        cluster = SingleClusterEnvironment(
            resource="xsede.comet",
            cores=core_count,
            walltime=90,
            username="******",
            project="unc100",
            #queue='debug',
            database_url=
            "mongodb://*****:*****@ds019678.mlab.com:19678/pilot_test")

        cluster.shared_data = [
            '/home/sean/midas/leaflet_finder/Vanilla/input.txt'
        ]

        # Allocate the resources.
        cluster.allocate()

        instance_count = int(math.ceil(float(traj_count) / float(window_size)))
        print "instance total is " + str(instance_count)
    try:
        # Create a new static execution context with one resource and a fixed
        # number of cores and runtime.

        script, sc, cc = argv

        coreCount = int(cc)
        setCount = int(sc)

        script_begin = datetime.datetime.now()

        cluster = SingleClusterEnvironment(
            resource="stampede.tacc.utexas.edu",
            #resource="local.localhost",
            cores=coreCount,
            walltime=10,
            #username=None,
            cleanup=False,
            #allocation=None
            username="******",
            allocation="TG-CCR140028")

        # Allocate the resources.
        cluster.allocate()

        # For example the set has 5 elements.
        ElementsSet = range(1, setCount + 1)
        randAP = RandomAP(setelements=ElementsSet)

        cluster.run(randAP)

        script_end = datetime.datetime.now()
Esempio n. 22
0
if __name__ == "__main__":

    script, sc, cc = argv

    coreCount = int(cc)
    setCount = int(sc)

    script_begin = datetime.datetime.now()	

    try:
        # Create a new static execution context with one resource and a fixed
        # number of cores and runtime.
        cluster = SingleClusterEnvironment(
            resource="stampede.tacc.utexas.edu",
            cores=coreCount,
	    cleanup=False,
            walltime=30,
	    username="******",
	    allocation="TG-CCR140028"
        )

        # Allocate the resources.
        cluster.allocate()

        # For example the set has 5 elements.
        ElementsSet1 = range(1,setCount+1)
        randAP = RandomAP(set1elements=ElementsSet1,windowsize1=1)

        cluster.run(randAP)
	
	script_end = datetime.datetime.now()
	script_diff = script_end - script_begin
    def test_replica_exchange(self, cmdopt):
        #if __name__ == "__main__":
        resource = cmdopt
        try:

            with open(
                    '/home/suvigya/radical.ensemblemd-master/tests/tests/config.json'
            ) as data_file:
                config = json.load(data_file)

            print 'Project: ', config[resource]['project']
            print 'Username: '******'username']
            # Create a new static execution context with one resource and a fixed
            # number of cores and runtime.

            workdir_local = os.getcwd()
            cluster = SingleClusterEnvironment(
                resource=resource,
                cores=1,
                walltime=15,
                username=config[resource]['username'],
                project=config[resource]['project'],
                access_schema=config[resource]['schema'],
                queue=config[resource]['queue'],
                database_url=
                'mongodb://*****:*****@ds051585.mongolab.com:51585/rutgers_thesis',
                #database_name='myexps',
            )

            # Allocate the resources.
            cluster.allocate()

            # creating RE pattern object
            re_pattern = _TestRePattern(workdir_local)

            # set number of replicas
            re_pattern.replicas = 2

            # set number of cycles
            re_pattern.nr_cycles = 1

            # initializing replica objects
            replicas = re_pattern.initialize_replicas()

            re_pattern.add_replicas(replicas)

            # run RE simulation
            cluster.run(re_pattern,
                        force_plugin="replica_exchange.static_pattern_1")

            cluster.deallocate()

            print "RE simulation finished!"
            print "Simulation performed {0} cycles for {1} replicas. In your working directory you should".format(
                re_pattern.nr_cycles, re_pattern.replicas)
            print "have {0} md_input_x_y.md files and {0} md_input_x_y.out files where x in {{0,1,2,...{1}}} and y in {{0,1,...{2}}}.".format(
                (re_pattern.nr_cycles * re_pattern.replicas),
                (re_pattern.replicas - 1), (re_pattern.nr_cycles - 1))
            print ".md file is replica input file and .out is output file providing number of occurrences of each character."

            assert os.path.isfile("./md_input_0_0.out") and os.path.isfile(
                "./md_input_1_0.out")
            os.remove("./md_input_0_0.out")
            os.remove("./md_input_0_0.md")
            os.remove("./md_input_1_0.out")
            os.remove("./md_input_1_0.md")

        except EnsemblemdError, er:

            print "Ensemble MD Toolkit Error: {0}".format(str(er))
            raise  # Just raise the execption again to get the backtrace
Esempio n. 24
0
        k.copy_input_data = "$STAGE_1/asciifile-{0}.dat".format(instance)
        k.download_output_data = "cfreqs-{0}.dat".format(instance)
        return k


# ------------------------------------------------------------------------------
#
if __name__ == "__main__":
    try:

        cluster = SingleClusterEnvironment(
            resource="local.localhost",
            cores=1,
            walltime=15,
            #			username='******',
            #			project='TG-MCB090174',
            #			queue='development',
            database_url=
            'mongodb://ec2-54-221-194-147.compute-1.amazonaws.com:24242',
            database_name='myexps',
        )

        # Allocate the resources.
        cluster.allocate()

        ccount = CalculateChecksums(stages=2, tasks=16)
        cluster.run(ccount)
        cluster.deallocate()

    except EnsemblemdError, er:
Esempio n. 25
0
        data_file_path = gen_datafile(config["instance_data_size"])

        # iterate over cores
        for cfg_core in config["cores"]:

            # iterate over instances
            for cfg_inst in config["instances"]:

                for cfg_iter in config["iterations"]:

                    print "\n\ncores: %s instances: %s iterations: %s" % (
                        cfg_core, cfg_inst, cfg_iter)

                    cluster = SingleClusterEnvironment(resource="localhost",
                                                       cores=cfg_core,
                                                       walltime=30,
                                                       username=None,
                                                       allocation=None)
                    # wait=True waits for the pilot to become active
                    # before the call returns. This is not useful when
                    # you want to take advantage of the queueing time /
                    # file-transfer overlap, but it's useful for baseline
                    # performance profiling of a specific pattern.
                    cluster.allocate(wait=True)

                    nopsa = NopSA(maxiterations=cfg_iter,
                                  simulation_instances=cfg_inst,
                                  analysis_instances=cfg_inst,
                                  idle_time=config["idletime"],
                                  data_file_path=data_file_path,
                                  local_workdir=local_workdir)
        # Download the result files.
        k.download_output_data = output_filename
        return k


# ------------------------------------------------------------------------------
#
if __name__ == "__main__":

    try:
        # Create a new static execution context with one resource and a fixed
        # number of cores and runtime.
        cluster = SingleClusterEnvironment(resource="xsede.stampede",
                                           cores=128,
                                           walltime=30,
                                           username="******",
                                           allocation="TG-CCR140028",
                                           queue="development")

        # Allocate the resources.
        cluster.allocate()

        # For example the set has 5 elements.
        ElementsSet = range(1, 56)
        randAP = RandomAP(setelements=ElementsSet)

        cluster.run(randAP)

        print "Succefully Completed! Everything is downloaded!"

    except EnsemblemdError, er:
        resource = 'local.localhost'

    try:

        with open('%s/config.json' %
                  os.path.dirname(os.path.abspath(__file__))) as data_file:
            config = json.load(data_file)

        # Create a new static execution context with one resource and a fixed
        # number of cores and runtime.
        cluster = SingleClusterEnvironment(
            resource=resource,
            cores=1,
            walltime=15,
            #username=None,
            project=config[resource]['project'],
            access_schema=config[resource]['schema'],
            queue=config[resource]['queue'],
            database_url=
            'mongodb://ec2-54-221-194-147.compute-1.amazonaws.com:24242',
            database_name='myexps',
        )

        # Allocate the resources.
        cluster.allocate()

        # We set both the the simulation and the analysis step 'instances' to 16.
        # If they
        mssa = MSSA(iterations=4,
                    simulation_instances=16,
                    analysis_instances=1)
        if args.RPconfig is None:
            parser.error('Please enter a RP configuration file')
            sys.exit(1)
        if args.Kconfig is None:
            parser.error('Please enter a Kernel configuration file')
            sys.exit(0)

        RPconfig = imp.load_source('RPconfig', args.RPconfig)
        Kconfig = imp.load_source('Kconfig', args.Kconfig)

        # Create a new static execution context with one resource and a fixed
        # number of cores and runtime.
        cluster = SingleClusterEnvironment(
            resource=RPconfig.REMOTE_HOST,
            cores=RPconfig.PILOTSIZE,
            walltime=RPconfig.WALLTIME,
            username=RPconfig.UNAME,  #username
            project=RPconfig.ALLOCATION,  #project
            queue=RPconfig.QUEUE,
            database_url=RPconfig.DBURL)

        cluster.allocate()

        # We set the 'instances' of the simulation step to 16. This means that 16
        # instances of the simulation are executed every iteration.
        # We set the 'instances' of the analysis step to 1. This means that only
        # one instance of the analysis is executed for each iteration
        randomsa = Gromacs_LSDMap(maxiterations=Kconfig.num_iterations,
                                  simulation_instances=Kconfig.num_CUs,
                                  analysis_instances=1)

        cluster.run(randomsa)
Esempio n. 29
0

# ------------------------------------------------------------------------------
#
if __name__ == "__main__":

    try:

        # Create a new static execution context with one resource and a fixed
        # number of cores and runtime.
        cluster = SingleClusterEnvironment(
            resource='local.localhost',
            cores=1,
            walltime=15,
            #username = None,
            #project = None,
            #queue = None,

            #                database_url='mongodb://ec2-54-221-194-147.compute-1.amazonaws.com:24242',
            #                database_name='myexps',
        )

        # Allocate the resources.
        cluster.allocate()

        # Set the 'instances' of the pipeline to 16. This means that 16 instances
        # of each pipeline step are executed.
        #
        # Execution of the 16 pipeline instances can happen concurrently or
        # sequentially, depending on the resources (cores) available in the
        # SingleClusterEnvironment.
        ]
        k.download_input_data = "http://gist.githubusercontent.com/oleweidner/6084b9d56b04389717b9/raw/611dd0c184be5f35d75f876b13604c86c470872f/gistfile1.txt > UTF-8-demo.txt"
        k.download_output_data = "checksum{0}.sha1".format(instance)
        return k


# ------------------------------------------------------------------------------
#
if __name__ == "__main__":

    try:
        # Create a new static execution context with one resource and a fixed
        # number of cores and runtime.
        cluster = SingleClusterEnvironment(resource="stampede.tacc.utexas.edu",
                                           cores=1,
                                           walltime=15,
                                           username=None,
                                           allocation=None)

        # Set the 'instances' of the pipeline to 16. This means that 16 instances
        # of each pipeline step are executed.
        #
        # Execution of the 16 pipeline instances can happen concurrently or
        # sequentially, depending on the resources (cores) available in the
        # SingleClusterEnvironment.
        ccount = CalculateChecksums(instances=128)

        cluster.run(ccount)

        # Print the checksums
        print "\nResulting checksums:"