예제 #1
0
    def __init__(self, outdir, config, is_synthetic_workflow):
        "'outdir' is the directory where the workflow is written, and 'config' is a ConfigParser object"
        self.outdir = outdir
        self.config = config
        self.daxfile = os.path.join(self.outdir, "dax.xml")
        self.replicas = {}

        # Get all the values from the config file
        self.temperatures = [x.strip() for x in self.getconf("temperatures").split(",")]
        self.equilibrate_steps = self.getconf("equilibrate_steps")
        self.production_steps = self.getconf("production_steps")
        self.equilibrate_output = self.getconf("equilibrate_output")
        self.production_output = self.getconf("production_output")
        self.pressure = self.getconf("pressure")
        self.charge = self.getconf("charge")
        self.structure = self.getconf("structure")
        self.coordinates = self.getconf("coordinates")
        self.parameters = self.getconf("parameters")
        self.topfile = self.getconf("topfile")
        self.extended_system = self.getconf("extended_system")
        self.sassena_db = self.getconf("sassena_db")

        self.incoherent_db = "database/db-neutron-incoherent.xml"
        self.coherent_db = "database/db-neutron-coherent.xml"

        self.is_synthetic_workflow = is_synthetic_workflow
        # if synthetic workflow we do not have database dir
        if self.is_synthetic_workflow:
            self.incoherent_db = "db-neutron-incoherent.xml"
            self.coherent_db = "db-neutron-coherent.xml"
            self.keg_params = KegParametersFactory(self.config)

            # mocking input files
            for input_file in [ "structure", "coordinates", "parameters",
                "topfile", "extended_system", "sassena_db" ]:
                self.__dict__[input_file] = input_file + "_mock"
                mock_path = os.path.join("inputs", input_file + "_mock")
                self.keg_params.generate_input_file(input_file, mock_path)
예제 #2
0
class RefinementWorkflow(object):
    def __init__(self, outdir, config, is_synthetic_workflow):
        "'outdir' is the directory where the workflow is written, and 'config' is a ConfigParser object"
        self.outdir = outdir
        self.config = config
        self.daxfile = os.path.join(self.outdir, "dax.xml")
        self.replicas = {}

        # Get all the values from the config file
        self.charges = [x.strip() for x in self.getconf("charges").split(",")]
        self.temperature = self.getconf("temperature")
        self.equilibrate_steps = self.getconf("equilibrate_steps")
        self.production_steps = self.getconf("production_steps")
        self.equilibrate_output = self.getconf("equilibrate_output")
        self.production_output = self.getconf("production_output")
        self.pressure = self.getconf("pressure")
        self.coordinates = self.getconf("coordinates")
        self.parameters = self.getconf("parameters")
        self.topfile = self.getconf("topfile")
        self.extended_system = self.getconf("extended_system")
        self.sassena_db = self.getconf("sassena_db")

        self.incoherent_db = "database/db-neutron-incoherent.xml"
        self.coherent_db = "database/db-neutron-coherent.xml"

        self.is_synthetic_workflow = is_synthetic_workflow
        # if synthetic workflow we do not have database dir
        if self.is_synthetic_workflow:
            self.incoherent_db = "db-neutron-incoherent.xml"
            self.coherent_db = "db-neutron-coherent.xml"
            self.keg_params = KegParametersFactory(self.config)

            # mocking input files
            for input_file in [ "coordinates", "parameters",
                "topfile", "extended_system", "sassena_db" ]:
                self.__dict__[input_file] = input_file + "_mock"
                mock_path = os.path.join("inputs", input_file + "_mock")
                self.keg_params.generate_input_file(input_file, mock_path)

    def getconf(self, name, section="simulation"):
        return self.config.get(section, name)

    def add_replica(self, name, path):
        "Add a replica entry to the replica catalog for the workflow"
        url = "file://%s" % path
        self.replicas[name] = url

    def generate_replica_catalog(self):
        "Write the replica catalog for this workflow to a file"
        path = os.path.join(self.outdir, "rc.txt")
        f = open(path, "w")
        try:
            for name, url in self.replicas.items():
                f.write('%-30s %-100s pool="local"\n' % (name, url))
        finally:
            f.close()

    def generate_psf(self, charge):
        "Generate an psf files for charge'"
        name = "Q%s.psf" % charge
        path = os.path.join(self.outdir, name)
        kw = {
            "charge": "%10.6f" % (0.01 * float(charge)),
            "charge2": "%10.6f" % (-0.02 * float(charge))
        }
        format_template("charge.xml", path, **kw)
        self.add_replica(name, path)

    def generate_eq_conf(self, charge, structure):
        "Generate an equilibrate configuration file for 'charge'"
        name = "equilibrate_%s.conf" % charge
        path = os.path.join(self.outdir, name)
        kw = {
            "temperature": self.temperature,
            "pressure": self.pressure,
            "charge": charge,
            "structure": structure,
            "coordinates": self.coordinates,
            "parameters": self.parameters,
            "outputname": "equilibrate_%s" % charge,
            "extended_system": self.extended_system,
            "timesteps": self.equilibrate_steps,
            "timeoutput": self.equilibrate_output
        }
        format_template("equilibrate.conf", path, **kw)
        self.add_replica(name, path)

    def generate_prod_conf(self, charge, structure):
        "Generate a production configuration file for 'charge'"
        name = "production_%s.conf" % charge
        path = os.path.join(self.outdir, name)
        kw = {
            "temperature": self.temperature,
            "pressure": self.pressure,
            "charge": charge,
            "structure": structure,
            "coordinates": self.coordinates,
            "parameters": self.parameters,
            "inputname": "equilibrate_%s" % charge,
            "outputname": "production_%s" % charge,
            "timesteps": self.production_steps,
            "timeoutput": self.production_output
        }
        format_template("production.conf", path, **kw)
        self.add_replica(name, path)

    def generate_ptraj_conf(self, charge):
        "Generate a ptraj configuration file for 'charge'"
        name = "ptraj_%s.conf" % charge
        path = os.path.join(self.outdir, name)
        kw = {
            "trajectory_input": "production_%s.dcd" % charge,
            "trajectory_fit": "ptraj_%s.fit" % charge,
            "trajectory_output": "ptraj_%s.dcd" % charge
        }
        format_template("rms2first.ptraj", path, **kw)
        self.add_replica(name, path)

    def generate_incoherent_conf(self, charge):
        "Generate a sassena incoherent config file for 'charge'"
        name = "sassenaInc_%s.xml" % charge
        path = os.path.join(self.outdir, name)
        kw = {
            "coordinates": self.coordinates,
            "trajectory": "ptraj_%s.dcd" % charge,
            "output": "fqt_inc_%s.hd5" % charge,
            "database": self.incoherent_db
        }
        format_template("sassenaInc.xml", path, **kw)
        self.add_replica(name, path)

    def generate_coherent_conf(self, charge):
        "Generate a sassena coherent config file for 'charge'"
        name = "sassenaCoh_%s.xml" % charge
        path = os.path.join(self.outdir, name)
        kw = {
            "coordinates": self.coordinates,
            "trajectory": "ptraj_%s.dcd" % charge,
            "output": "fqt_coh_%s.hd5" % charge,
            "database": self.coherent_db
        }
        format_template("sassenaCoh.xml", path, **kw)
        self.add_replica(name, path)

    def generate_dax(self):
        "Generate a workflow (DAX, config files, and replica catalog)"
        ts = datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
        dax = ADAG("refinement-%s" % ts)

        # These are all the global input files for the workflow
        coordinates = File(self.coordinates)
        parameters = File(self.parameters)
        extended_system = File(self.extended_system)
        topfile = File(self.topfile)
        sassena_db = File(self.sassena_db)
        incoherent_db = File(self.incoherent_db)
        coherent_db = File(self.coherent_db)

        # This job untars the sassena db and makes it available to the other
        # jobs in the workflow
        untarjob = Job("tar", node_label="untar")

        if self.is_synthetic_workflow:
            untarjob.addArguments("-p", "-xzvf", sassena_db.name)
            untarjob.addArguments("-a", "tar")

            for output_file in [ "incoherent_db", "coherent_db" ]:
                untarjob.addArguments(self.keg_params.output_file("tar", output_file, eval(output_file).name))

            self.keg_params.add_keg_params(untarjob)
        else:
            untarjob.addArguments("-xzvf", sassena_db)

        untarjob.uses(sassena_db, link=Link.INPUT)
        untarjob.uses(incoherent_db, link=Link.OUTPUT, transfer=False)
        untarjob.uses(coherent_db, link=Link.OUTPUT, transfer=False)

        untarjob.profile("globus", "jobtype", "single")
        untarjob.profile("globus", "maxwalltime", "1")
        untarjob.profile("globus", "count", "1")

        dax.addJob(untarjob)

        # For each charge that was listed in the config file
        for charge in self.charges:

            structure = "Q%s.psf" % charge

            # Equilibrate files
            eq_conf = File("equilibrate_%s.conf" % charge)
            eq_coord = File("equilibrate_%s.restart.coord" % charge)
            eq_xsc = File("equilibrate_%s.restart.xsc" % charge)
            eq_vel = File("equilibrate_%s.restart.vel" % charge)

            # Production files
            prod_conf = File("production_%s.conf" % charge)
            prod_dcd = File("production_%s.dcd" % charge)

            # Ptraj files
            ptraj_conf = File("ptraj_%s.conf" % charge)
            ptraj_fit = File("ptraj_%s.fit" % charge)
            ptraj_dcd = File("ptraj_%s.dcd" % charge)

            # Sassena incoherent files
            incoherent_conf = File("sassenaInc_%s.xml" % charge)
            fqt_incoherent = File("fqt_inc_%s.hd5" % charge)

            # Sassena coherent files
            coherent_conf = File("sassenaCoh_%s.xml" % charge)
            fqt_coherent = File("fqt_coh_%s.hd5" % charge)

            # Generate psf and configuration files for this charge pipeline
            self.generate_psf(charge)
            self.generate_eq_conf(charge, structure)
            self.generate_prod_conf(charge, structure)
            self.generate_ptraj_conf(charge)
            self.generate_incoherent_conf(charge)
            self.generate_coherent_conf(charge)

            # Equilibrate job
            eqjob = Job("namd", node_label="namd_eq_%s" % charge)
            if self.is_synthetic_workflow:
                eqjob.addArguments("-p", eq_conf)
                eqjob.addArguments("-a", "namd_eq_%s" % charge)
                eqjob.addArguments("-i", eq_conf.name, structure, coordinates.name,
                    parameters.name, extended_system.name)

                task_label = "namd-eq"

                for output_file in [ "eq_coord", "eq_xsc", "eq_vel" ]:
                    eqjob.addArguments(self.keg_params.output_file(task_label, output_file, eval(output_file).name))

                self.keg_params.add_keg_params(eqjob, task_label)
            else:
                eqjob.addArguments(eq_conf)

            eqjob.uses(eq_conf, link=Link.INPUT)
            eqjob.uses(structure, link=Link.INPUT)
            eqjob.uses(coordinates, link=Link.INPUT)
            eqjob.uses(parameters, link=Link.INPUT)
            eqjob.uses(extended_system, link=Link.INPUT)
            eqjob.uses(eq_coord, link=Link.OUTPUT, transfer=False)
            eqjob.uses(eq_xsc, link=Link.OUTPUT, transfer=False)
            eqjob.uses(eq_vel, link=Link.OUTPUT, transfer=False)
            if self.is_synthetic_workflow:
                eqjob.profile("globus", "jobtype", "mpi")
                eqjob.profile("globus", "maxwalltime", "1")
                eqjob.profile("globus", "count", "8")
            else:
                eqjob.profile("globus", "jobtype", "mpi")
                eqjob.profile("globus", "maxwalltime", self.getconf("equilibrate_maxwalltime"))
                eqjob.profile("globus", "count", self.getconf("equilibrate_cores"))
            dax.addJob(eqjob)

            # Production job
            prodjob = Job("namd", node_label="namd_prod_%s" % charge)

            if self.is_synthetic_workflow:
                prodjob.addArguments("-p", prod_conf)
                prodjob.addArguments("-a", "namd_prod_%s" % charge)
                prodjob.addArguments("-i", prod_conf.name, structure, coordinates.name,
                    parameters.name, eq_coord.name, eq_xsc.name, eq_vel.name)

                task_label = "namd-prod"
                prodjob.addArguments(self.keg_params.output_file(task_label, "prod_dcd", prod_dcd.name))
                self.keg_params.add_keg_params(prodjob, task_label)
            else:
                prodjob.addArguments(prod_conf)

            prodjob.uses(prod_conf, link=Link.INPUT)
            prodjob.uses(structure, link=Link.INPUT)
            prodjob.uses(coordinates, link=Link.INPUT)
            prodjob.uses(parameters, link=Link.INPUT)
            prodjob.uses(eq_coord, link=Link.INPUT)
            prodjob.uses(eq_xsc, link=Link.INPUT)
            prodjob.uses(eq_vel, link=Link.INPUT)
            prodjob.uses(prod_dcd, link=Link.OUTPUT, transfer=True)

            if self.is_synthetic_workflow:
                prodjob.profile("globus", "jobtype", "mpi")
                prodjob.profile("globus", "maxwalltime", "6")
                prodjob.profile("globus", "count", "8")
            else:
                prodjob.profile("globus", "jobtype", "mpi")
                prodjob.profile("globus", "maxwalltime", self.getconf("production_maxwalltime"))
                prodjob.profile("globus", "count", self.getconf("production_cores"))

            dax.addJob(prodjob)
            dax.depends(prodjob, eqjob)

            # ptraj job
            ptrajjob = Job(namespace="amber", name="ptraj", node_label="amber_ptraj_%s" % charge)

            if self.is_synthetic_workflow:
                ptrajjob.addArguments("-p", topfile)
                ptrajjob.addArguments("-a", "amber_ptraj_%s" % charge)
                ptrajjob.addArguments("-i", topfile.name, ptraj_conf.name, prod_dcd.name)

                task_label = "amber-ptraj"

                for output_file in [ "ptraj_fit", "ptraj_dcd" ]:
                    ptrajjob.addArguments(self.keg_params.output_file(task_label, output_file, eval(output_file).name))

                self.keg_params.add_keg_params(ptrajjob, task_label)

            else:
                ptrajjob.addArguments(topfile)
                ptrajjob.setStdin(ptraj_conf)

            ptrajjob.uses(topfile, link=Link.INPUT)
            ptrajjob.uses(ptraj_conf, link=Link.INPUT)
            ptrajjob.uses(prod_dcd, link=Link.INPUT)
            ptrajjob.uses(ptraj_fit, link=Link.OUTPUT, transfer=True)
            ptrajjob.uses(ptraj_dcd, link=Link.OUTPUT, transfer=True)
            ptrajjob.profile("globus", "jobtype", "single")
            ptrajjob.profile("globus", "maxwalltime", self.getconf("ptraj_maxwalltime"))
            ptrajjob.profile("globus", "count", self.getconf("ptraj_cores"))
            dax.addJob(ptrajjob)
            dax.depends(ptrajjob, prodjob)

            # sassena incoherent job
            incojob = Job("sassena", node_label="sassena_inc_%s" % charge)
            if self.is_synthetic_workflow:
                incojob.addArguments("-p", "--config", incoherent_conf)
                incojob.addArguments("-a", "sassena_inc_%s" % charge)
                incojob.addArguments("-i", incoherent_conf.name, ptraj_dcd.name, incoherent_db.name, coordinates.name)

                task_label = "sassena-inc"

                incojob.addArguments(self.keg_params.output_file(task_label, "fqt_incoherent", fqt_incoherent.name))

                self.keg_params.add_keg_params(incojob, task_label)
            else:
                incojob.addArguments("--config", incoherent_conf)

            incojob.uses(incoherent_conf, link=Link.INPUT)
            incojob.uses(ptraj_dcd, link=Link.INPUT)
            incojob.uses(incoherent_db, link=Link.INPUT)
            incojob.uses(coordinates, link=Link.INPUT)
            incojob.uses(fqt_incoherent, link=Link.OUTPUT, transfer=True)

            if self.is_synthetic_workflow:
                incojob.profile("globus", "jobtype", "mpi")
                incojob.profile("globus", "maxwalltime", "6")
                incojob.profile("globus", "count", "8")
            else:
                incojob.profile("globus", "jobtype", "mpi")
                incojob.profile("globus", "maxwalltime", self.getconf("sassena_maxwalltime"))
                incojob.profile("globus", "count", self.getconf("sassena_cores"))

            dax.addJob(incojob)
            dax.depends(incojob, ptrajjob)
            dax.depends(incojob, untarjob)

            # sassena coherent job
            cojob = Job("sassena", node_label="sassena_coh_%s" % charge)
            if self.is_synthetic_workflow:
                cojob.addArguments("-p", "--config", coherent_conf)
                cojob.addArguments("-a", "sassena_coh_%s" % charge)
                cojob.addArguments("-i", coherent_conf.name, ptraj_dcd.name, coherent_db.name, coordinates.name)

                task_label = "sassena-coh"

                cojob.addArguments(self.keg_params.output_file(task_label, "fqt_coherent", fqt_coherent.name))

                self.keg_params.add_keg_params(cojob, task_label)

            else:
                cojob.addArguments("--config", coherent_conf)

            cojob.uses(coherent_conf, link=Link.INPUT)
            cojob.uses(ptraj_dcd, link=Link.INPUT)
            cojob.uses(coherent_db, link=Link.INPUT)
            cojob.uses(coordinates, link=Link.INPUT)
            cojob.uses(fqt_coherent, link=Link.OUTPUT, transfer=True)

            if self.is_synthetic_workflow:
                cojob.profile("globus", "jobtype", "mpi")
                cojob.profile("globus", "maxwalltime", "6")
                cojob.profile("globus", "count", "8")
            else:
                cojob.profile("globus", "jobtype", "mpi")
                cojob.profile("globus", "maxwalltime", self.getconf("sassena_maxwalltime"))
                cojob.profile("globus", "count", self.getconf("sassena_cores"))

            dax.addJob(cojob)
            dax.depends(cojob, prodjob)
            dax.depends(cojob, untarjob)

        # Write the DAX file
        dax.writeXMLFile(self.daxfile)

    def generate_workflow(self):

        # Generate dax
        self.generate_dax()

        # Generate the replica catalog
        self.generate_replica_catalog()