class TextRankingOutputCube(SinkCube):
    """
    A cube that outputs text
    """

    method = parameter.StringParameter('method', default='Fingerprint',
                                    help_text='Method used for the ranking')

    fptype = parameter.IntegerParameter('fptype', default=105,
                                    help_text="Fingerprint type to use for the ranking")

    intake = ObjectInputPort('intake')
    name = FileOutputParameter('name',
                               required=True,
                               description='The name of the output file')
    title = "Ranking Writer"
    classification = [["Output"]]

    def begin(self):
        if self.args.method == 'Fingerprint':
            fptypes = {102 : 'path', 104 : 'circular', 105 : 'tree'}
            self.FPType = fptypes[self.args.fptype]
            self.name_ext = 'FP_' + self.FPType
        elif self.args.method == 'FastROCS':
            self.name_ext = 'FR'

        self.in_orion = config_from_env() is not None
        if self.in_orion:
            self.stream = tempfile.NamedTemporaryFile()
        else:
            path = self.args.name + "ranking_" + self.name_ext + ".txt"
            self.stream = open(path, 'wb')


    def write(self, data, port):
        self.ranking_list = data[0]

        for i, ranking in enumerate(self.ranking_list):
            text = "\n" + "Set n°" + str(ranking[0][3]) + "\n"
            text = text.encode("utf-8")
            self.stream.write(text)
            for mol in ranking:
                mol_data = str(mol[3]) + " " + mol[1] + " " + str(mol[2]) + " " + str(mol[4]) +  "\n"
                mol_data = mol_data.encode("utf-8")
                self.stream.write(mol_data)

    def end(self):
        if self.in_orion:
            self.stream.flush()
            name = self.args.name + "ranking_" + self.name_ext + ".txt"
            resp = upload_file(name, self.stream.name)
            self.log.info("Created result file {} with ID {}".format(self.args.name, resp['id']))
        else:
            self.stream.close()
Exemple #2
0
class IndexInputCube(SourceCube):
    """
    An input cube that reads an index log and return the baitsets
    """

    classification = [["Input"]]

    success = ObjectOutputPort('success')

    limit = parameter.IntegerParameter(
        'limit',
        required=False,
        description='Read up to N items from this cube')

    data_in = parameter.DataSetInputParameter(
        'data_in',
        required=True,
        title='Index log',
        description='The index log to read from')

    def begin(self):
        self.in_orion = config_from_env() is not None
        if self.in_orion:
            #self.stream = stream_file(988)
            self.stream = stream_file(self.args.data_in)
        else:
            self.stream = open(str(self.args.data_in), 'rb')

    def __iter__(self):
        max_idx = self.args.limit
        if max_idx is not None:
            max_idx = int(max_idx)
        count = 0

        for chunk in self.stream:

            index_log = chunk.decode('utf-8')
            lines = index_log.split("set ")
            lines = lines[1:]
            for baitset in lines:
                baitset = baitset.split(" ")
                set_id = baitset[0][-2:-1]
                set_id = int(set_id)
                baitset = baitset[1:-1]
                for i, idx in enumerate(baitset):
                    if idx.isdigit():
                        baitset[i] = int(idx)
                count += 1
                if max_idx is not None and count == max_idx:
                    break
                yield (set_id, baitset)
Exemple #3
0
class OEMolTriggeredIStreamCube(ComputeCube):
    """
    A source cube that uses oechem to read molecules
    """
    classification = [["Input"]]
    success = MoleculeOutputPort('success')

    title = "Dataset Reader"

    limit = parameter.IntegerParameter(
        'limit',
        required=False,
        description='Read up to N items from this cube')
    fp_input = ObjectInputPort('fp_input')
    data_in = parameter.DataSetInputParameter(
        'data_in',
        required=True,
        title='Dataset to read from',
        description='The dataset to read from')
    download_format = parameter.StringParameter(
        'download_format',
        choices=('.oeb.gz', '.oeb', '.smi', '.pdb', '.mol2'),
        required=False,
        description=
        'The stream format to be used for retrieving molecules from Orion',
        default=".oeb.gz")

    received_act = False

    def process(self, data, port):
        #print(data,port)
        if port is 'fp_input':
            print('Curry wurst')
            self.received_act = True
            max_idx = self.args.limit
            if max_idx is not None:
                max_idx = int(max_idx)
            count = 0
            with oechem.oemolistream(str(self.args.data_in)) as ifs:
                for mol in ifs.GetOEMols():
                    self.success.emit(mol)
                    count += 1
                    if max_idx is not None and count == max_idx:
                        break
class ResultsOutputCube(SinkCube):
    """
    A cube that outputs Results dataframe in a csv file
    """

    fptype = parameter.IntegerParameter('fptype', default=105,
                                    help_text="Fingerprint type to use for the ranking")

    intake = ObjectInputPort('intake')
    name = FileOutputParameter('name',
                               required=True,
                               description='The name of the output file')
    title = "Results Writer"
    classification = [["Output"]]

    def begin(self):
        self.in_orion = config_from_env() is not None
        if self.in_orion:
            self.stream = tempfile.NamedTemporaryFile()

    def write(self, data, port):
        self.results_avg = data[0]
        self.method = data[1]

        if self.method == 'Fingerprint':
            fptypes = {102 : 'path', 104 : 'circular', 105 : 'tree'}
            self.FPType = fptypes[self.args.fptype]
            self.name_ext = 'FP_' + self.FPType
        elif self.method == 'FastROCS':
            self.name_ext = 'FR'

        if self.in_orion:
            self.results_avg.to_csv(self.stream.name)
            self.stream.flush()
            name = self.args.name + "_results_" + self.name_ext + ".csv"
            resp = upload_file(name, self.stream.name)
            self.log.info("Created result file {} with ID {}".format(name, resp['id']))
        else:
            path = self.args.name + "results_" + self.name_ext + ".csv"
            self.results_avg.to_csv(path)

    def end(self):
        pass
Exemple #5
0
class BenchmarkCube(SourceCube):

    title = "OpenMM BenchmarkCube"

    description = """
        Cube that performs a benchmark of OpenMM on all of the different
        platforms that are available and outputs the byte string
        resulting from the benchmarks line by line to its success port
    """

    tags = [["OpenMM", "Benchmarking"]]

    success = BinaryOutputPort("success")

    cutoff = parameter.DecimalParameter("cutoff", default=0.9)
    seconds = parameter.IntegerParameter("seconds", default=60)
    polarization = parameter.StringParameter(
        "polarization",
        default="mutual",
        choices=["direct", "extrapolated", "mutual"])
    amoeba_target_epsilon = parameter.DecimalParameter(
        "amoeba_target_epsilon",
        default=1e-5,
        title="Amoeba Mutual Induced Target Epsilon")
    use_heavy_hydrogens = parameter.BooleanParameter(
        "use_heavy_hydrogens", default=False, title="Use Heavy Hydrogens")
    precision = parameter.StringParameter(
        "precision", default="single", choices=["single", "mixed", "double"])

    def __iter__(self):
        stream = StringIO()
        stream.write("Benchmarking Results:\n")
        run_platform_benchmarks(self.args, stream=stream)
        stream.flush()
        stream.seek(0)
        output = stream.readline()
        while len(output):
            self.log.info(output)
            yield output.encode("utf-8")
            output = stream.readline()
Exemple #6
0
class YankHydrationCube(ParallelOEMolComputeCube):
    title = "YankHydrationCube"
    description = """
    Compute the hydration free energy of a small molecule with YANK.

    This cube uses the YANK alchemical free energy code to compute the
    transfer free energy of one or more small molecules from gas phase
    to TIP3P solvent.

    See http://getyank.org for more information about YANK.
    """
    classification = ["Alchemical free energy calculations"]
    tags = [tag for lists in classification for tag in lists]

    # Override defaults for some parameters
    parameter_overrides = {
        "prefetch_count": {
            "default": 1
        },  # 1 molecule at a time
        "item_timeout": {
            "default": 3600
        },  # Default 1 hour limit (units are seconds)
        "item_count": {
            "default": 1
        }  # 1 molecule at a time
    }

    #Define Custom Ports to handle oeb.gz files
    intake = CustomMoleculeInputPort('intake')
    success = CustomMoleculeOutputPort('success')
    failure = CustomMoleculeOutputPort('failure')

    # These can override YAML parameters
    nsteps_per_iteration = parameter.IntegerParameter(
        'nsteps_per_iteration',
        default=500,
        help_text="Number of steps per iteration")

    timestep = parameter.DecimalParameter('timestep',
                                          default=2.0,
                                          help_text="Timestep (fs)")

    simulation_time = parameter.DecimalParameter(
        'simulation_time',
        default=0.100,
        help_text="Simulation time (ns/replica)")

    temperature = parameter.DecimalParameter('temperature',
                                             default=300.0,
                                             help_text="Temperature (Kelvin)")

    pressure = parameter.DecimalParameter('pressure',
                                          default=1.0,
                                          help_text="Pressure (atm)")

    solvent = parameter.StringParameter(
        'solvent',
        default='gbsa',
        choices=['gbsa', 'tip3p'],
        help_text="Solvent choice: one of ['gbsa', 'tip3p']")

    verbose = parameter.BooleanParameter(
        'verbose',
        default=False,
        help_text="Print verbose YANK logging output")

    def construct_yaml(self, **kwargs):
        # Make substitutions to YAML here.
        # TODO: Can we override YAML parameters without having to do string substitutions?
        options = {
            'timestep':
            self.args.timestep,
            'nsteps_per_iteration':
            self.args.nsteps_per_iteration,
            'number_of_iterations':
            int(
                np.ceil(self.args.simulation_time * unit.nanoseconds /
                        (self.args.nsteps_per_iteration * self.args.timestep *
                         unit.femtoseconds))),
            'temperature':
            self.args.temperature,
            'pressure':
            self.args.pressure,
            'solvent':
            self.args.solvent,
            'verbose':
            'yes' if self.args.verbose else 'no',
        }

        for parameter in kwargs.keys():
            options[parameter] = kwargs[parameter]

        return hydration_yaml_template % options

    def begin(self):
        # TODO: Is there another idiom to use to check valid input?
        if self.args.solvent not in ['gbsa', 'tip3p']:
            raise Exception("solvent must be one of ['gbsa', 'tip3p']")

        # Compute kT
        kB = unit.BOLTZMANN_CONSTANT_kB * unit.AVOGADRO_CONSTANT_NA  # Boltzmann constant
        self.kT = kB * (self.args.temperature * unit.kelvin)

    def process(self, mol, port):
        kT_in_kcal_per_mole = self.kT.value_in_unit(unit.kilocalories_per_mole)

        # Retrieve data about which molecule we are processing
        title = mol.GetTitle()

        with TemporaryDirectory() as output_directory:
            try:
                # Print out which molecule we are processing
                self.log.info('Processing {} in directory {}.'.format(
                    title, output_directory))

                # Check that molecule is charged.
                if not molecule_is_charged(mol):
                    raise Exception(
                        'Molecule %s has no charges; input molecules must be charged.'
                        % mol.GetTitle())

                # Write the specified molecule out to a mol2 file without changing its name.
                mol2_filename = os.path.join(output_directory, 'input.mol2')
                ofs = oechem.oemolostream(mol2_filename)
                oechem.OEWriteMol2File(ofs, mol)

                # Undo oechem fuckery with naming mol2 substructures `<0>`
                from YankCubes.utils import unfuck_oechem_mol2_file
                unfuck_oechem_mol2_file(mol2_filename)

                # Run YANK on the specified molecule.
                from yank.yamlbuild import YamlBuilder
                yaml = self.construct_yaml(output_directory=output_directory)
                yaml_builder = YamlBuilder(yaml)
                yaml_builder.build_experiments()
                self.log.info(
                    'Ran Yank experiments for molecule {}.'.format(title))

                # Analyze the hydration free energy.
                from yank.analyze import estimate_free_energies
                (Deltaf_ij_solvent,
                 dDeltaf_ij_solvent) = estimate_free_energies(
                     netcdf.Dataset(
                         output_directory + '/experiments/solvent1.nc', 'r'))
                (Deltaf_ij_vacuum, dDeltaf_ij_vacuum) = estimate_free_energies(
                    netcdf.Dataset(
                        output_directory + '/experiments/solvent2.nc', 'r'))
                DeltaG_hydration = Deltaf_ij_vacuum[0,
                                                    -1] - Deltaf_ij_solvent[0,
                                                                            -1]
                dDeltaG_hydration = np.sqrt(Deltaf_ij_vacuum[0, -1]**2 +
                                            Deltaf_ij_solvent[0, -1]**2)

                # Add result to original molecule
                oechem.OESetSDData(mol, 'DeltaG_yank_hydration',
                                   str(DeltaG_hydration * kT_in_kcal_per_mole))
                oechem.OESetSDData(
                    mol, 'dDeltaG_yank_hydration',
                    str(dDeltaG_hydration * kT_in_kcal_per_mole))
                self.log.info(
                    'Analyzed and stored hydration free energy for molecule {}.'
                    .format(title))

                # Emit molecule to success port.
                self.success.emit(mol)

            except Exception as e:
                self.log.info(
                    'Exception encountered when processing molecule {}.'.
                    format(title))
                # Attach error message to the molecule that failed
                # TODO: If there is an error in the leap setup log,
                # we should capture that and attach it to the failed molecule.
                self.log.error(traceback.format_exc())
                mol.SetData('error', str(e))
                # Return failed molecule
                self.failure.emit(mol)
Exemple #7
0
class GenerateTorsionalConfs(Cube):
    """
        Generate conformers by rotating the primary torsion.
    """

    cube_type = constants.CUBE_COMPUTE

    num_points = parameter.IntegerParameter(
        'num_points',
        title='Number of torsional conformers to generate.',
        default=24,
        min_value=1,
        max_value=36,
        description="""The number of evenly spaced torsion angles to sample 
        when generating torsional conformers.""")

    split_confs = parameter.BooleanParameter(
        'split_confs',
        title='Emit each conformer separately',
        default=True,
        description=
        """Whether conformers should be emitted separately or as part of a single molecule."""
    )

    best_conf = parameter.BooleanParameter(
        'best_conf',
        title='For each torsion select single best conformer',
        default=True,
        description=
        """Whether single best conformer should be emitted for each dihedral angle."""
    )

    def begin(self):
        self.torsion_library = torsion_library

    def process(self, mol, port):
        fragmentLabel = mol.GetTitle()+'_' +\
                         '_'.join(get_sd_data(mol, 'TORSION_ATOMS_ParentMol').split())

        torsion_tag = 'TORSION_ATOMS_FRAGMENT'
        torsion_atoms_in_fragment = get_sd_data(mol, torsion_tag).split()
        dihedral_atom_indices = [int(x) - 1 for x in torsion_atoms_in_fragment]
        print(fragmentLabel, torsion_atoms_in_fragment, dihedral_atom_indices)

        try:
            dih, _ = get_dihedral(mol, dihedral_atom_indices)
            if self.args.best_conf:
                torsional_conformers = get_best_conf(mol, dih,
                                                     self.args.num_points)
            else:
                torsional_conformers = gen_torsional_confs(
                    mol, dih, self.args.num_points, include_input=False)

            if self.args.split_confs:
                for pose in split_confs(torsional_conformers):
                    self.success.emit(pose)
            else:
                self.success.emit(torsional_conformers)

            self.log.info(
                '%d torsional conformers generated for fragment %s.' %
                (torsional_conformers.NumConfs(), fragmentLabel))

        except Exception as e:
            self.log.error(
                "Could not generate conformers for fragment %s: %s" %
                (fragmentLabel, e))
            self.failure.emit(mol)
class AnalyseRankings(ComputeCube):
    """

    """

    classification = [["Compute", "Analysis"]]

    fptype = parameter.IntegerParameter(
        'fptype',
        default=105,
        help_text="Fingerprint type to use for the ranking")

    topn = parameter.IntegerParameter(
        'topn',
        default=100,
        help_text=
        "Number of top molecules returned in the rankinNumber of top molecules returned in the ranking"
    )

    intake = ObjectInputPort('intake')
    success = ObjectOutputPort('success')

    def process(self, data, port):
        self.ranking_list = data[0]
        self.nb_ka = data[1]
        self.method = data[2]
        self.results_avg = self.ranking_analysis()

        self.success.emit((self.results_avg, self.method))

    def ranking_analysis(self):
        results = pd.DataFrame()
        for ranking in self.ranking_list:
            set_results = pd.DataFrame(columns=['RR', 'HR'])
            count = 0
            count_ka = 0
            for row, mol in enumerate(ranking):
                count += 1
                if mol[4] == 1:
                    count_ka += 1
                rr = 100 * count_ka / self.nb_ka
                hr = 100 * count_ka / count
                set_results.loc[row] = [rr, hr]
            results = pd.concat([results, set_results])

        results_avg = pd.DataFrame()

        if self.method == 'Fingerprint':
            fptypes = {102: 'path', 104: 'circular', 105: 'tree'}
            FPType = fptypes[self.args.fptype]
            name = 'FP_' + FPType
        elif self.method == 'FastROCS':
            name = 'FR'

        results_avg['Average RR ' + name] = results.groupby(
            results.index)['RR'].mean()
        results_avg['Average HR ' + name] = results.groupby(
            results.index)['HR'].mean()
        results_avg = results_avg.head(self.args.topn)

        return results_avg
class ParallelInsertKARestfulROCS(ParallelComputeCube):
    """
    """

    classification = [["ParallelCompute"]]

    url = parameter.StringParameter(
        'url',
        default="http://10.0.1.22:4242",
        help_text="Url of the Restful FastROCS Server for the request")

    topn = parameter.IntegerParameter(
        'topn',
        default=100,
        help_text=
        "Number of top molecules returned in the rankinNumber of top molecules returned in the ranking"
    )

    data_input = ObjectInputPort('data_input')
    success = ObjectOutputPort('success')

    def process(self, data, port):

        self.act_list = data[0]
        self.baitset = data[1]
        self.ranking = data[2]
        self.dataset_infos = data[3]
        self.log.info("processing KA for baitset : " + str(self.baitset[0]))

        self.dataset_identifier = self.dataset_infos[0]
        self.add_queries()
        self.get_results()
        for tanimoto, mol in self.cur_scores.values():
            self.update_ranking(mol, tanimoto, True)

        self.success.emit((self.act_list, self.baitset, self.ranking,
                           self.dataset_infos[0], 'FastROCS'))

    def add_queries(self):
        url = self.args.url + "/queries/"
        self.query_id_list = list()
        for idx in self.baitset[1]:
            self.query = tempfile.NamedTemporaryFile(suffix='.oeb',
                                                     mode='wb',
                                                     delete=False)
            with oechem.oemolostream(self.query.name) as ofs:
                oechem.OEWriteMolecule(ofs, self.act_list[idx])
            self.query.flush()

            parameters = {}
            parameters["num_hits"] = self.args.topn

            parameters["dataset_identifier"] = self.dataset_identifier
            with open(self.query.name, "rb") as query_file:
                response = requests.post(url,
                                         files={"query": query_file},
                                         data=parameters)
            os.remove(self.query.name)
            data = response.json()
            self.query_id_list.append(data["id"])

    def get_results(self):
        self.cur_scores = {}

        for query_id in self.query_id_list:
            url = self.args.url + "/queries/{}/".format(query_id)
            response = None
            tries = 0
            while response == None or data["status"]["job"] != "COMPLETED":
                tries += 1
                time.sleep(tries)
                response = requests.get(url)
                data = response.json()
            results_url = data["results"]
            results_data = requests.get(self.args.url + results_url)

            with tempfile.NamedTemporaryFile(suffix='.oeb',
                                             mode='wb',
                                             delete=False) as temp:
                temp.write(results_data.content)
                temp.flush()
                with oechem.oemolistream(temp.name) as results:
                    for mol in results.GetOEGraphMols():
                        if self.dataset_infos[1][
                                mol.GetTitle()] not in self.baitset[1]:
                            tanimoto_combo = float(
                                oechem.OEGetSDData(mol, "TanimotoCombo"))
                            if mol.GetTitle() in self.cur_scores.keys():
                                if self.cur_scores[
                                        mol.GetTitle()][0] < tanimoto_combo:
                                    self.cur_scores[mol.GetTitle()] = (
                                        tanimoto_combo, mol.CreateCopy())
                            else:
                                self.cur_scores[mol.GetTitle()] = (
                                    tanimoto_combo, mol.CreateCopy())
                os.remove(temp.name)

    def update_ranking(self, mol, max_tanimoto, ka_tag):
        index = 0
        if len(self.ranking) >= self.args.topn and max_tanimoto < self.ranking[
                len(self.ranking) - 1][2]:
            pass
        else:
            for top_mol in self.ranking:
                if max_tanimoto < top_mol[2]:
                    index = self.ranking.index(top_mol) + 1
                else:
                    break

            upper = self.ranking[:index]
            lower = self.ranking[index:]
            self.ranking = upper + [(oechem.OEMolToSmiles(mol), mol.GetTitle(),
                                     max_tanimoto, self.baitset[0], ka_tag)
                                    ] + lower

            i = self.args.topn - 1
            while i < len(self.ranking) - 1:
                if self.ranking[i][2] != self.ranking[i + 1][2]:
                    self.ranking = self.ranking[:i + 1]

                    break
                else:
                    i += 1

    def end(self):
        pass
class ParallelFastROCSRanking(ComputeCube):
    """
    A compute Cube that receives a Molecule a baitset of indices and a FastROCSServer address
    and returns the ranking of the Server Molecules against the query
    """

    classification = [["Compute", "FastROCS", "Similarity"]]

    url = parameter.StringParameter(
        'url',
        default="http://10.0.61.25:4711",
        help_text="Url of the FastROCS Server for the request")

    dataset_name = parameter.StringParameter(
        'dataset_name',
        default="screening_database",
        help_text="Name of the screening database")

    topn = parameter.IntegerParameter(
        'topn',
        default=100,
        help_text=
        "Number of top molecules returned in the rankinNumber of top molecules returned in the ranking"
    )

    data_input = ObjectInputPort('data_input')
    success = ObjectOutputPort('success')

    def begin(self):
        pass

    def process(self, data, port):

        self.act_list = data[0]
        self.baitset = data[1]
        self.ranking = data[2]
        self.dataset_infos = data[3]

        self.log.info("start ranking baitset number {}".format(
            self.baitset[0]))

        url = self.args.url + "/datasets/?name={}".format(
            self.args.dataset_name)
        response = requests.get(url)
        data = response.json()
        self.dataset_identifier = int(data["id"])

        count = 0
        self.add_queries()
        for query_id in self.query_id_list:
            cur_rank = self.get_result(query_id)
            if len(self.ranking) == 0:
                self.ranking = cur_rank
            else:
                self.merge_ranking(cur_rank)
            count += 1
            self.log.info("Baitset " + str(self.baitset[0]) + " : " +
                          str(count) + " requests processed")

        sys.stdout.flush()
        self.log.info("Emitting ranking baitset " + str(self.baitset[0]))
        self.success.emit((self.act_list, self.baitset, self.ranking,
                           self.dataset_infos, 'FastROCS'))

    def add_queries(self):
        url = self.args.url + "/queries/"
        self.query_id_list = list()
        for idx in self.baitset[1]:
            self.query = tempfile.NamedTemporaryFile(suffix='.oeb',
                                                     mode='wb',
                                                     delete=False)
            with oechem.oemolostream(self.query.name) as ofs:
                oechem.OEWriteMolecule(ofs, self.act_list[idx])
            self.query.flush()

            parameters = {}
            parameters["num_hits"] = self.args.topn

            parameters["dataset_identifier"] = self.dataset_identifier
            with open(self.query.name, "rb") as query_file:
                response = requests.post(url,
                                         files={"query": query_file},
                                         data=parameters)
            os.remove(self.query.name)
            data = response.json()
            self.query_id_list.append(data["id"])

    def get_result(self, query_id):
        cur_rank = list()

        url = self.args.url + "/queries/{}/".format(query_id)
        response = None
        tries = 0
        while response == None or data["status"]["job"] != "COMPLETED":
            time.sleep(60 * tries)
            tries += 1
            response = requests.get(url)
            data = response.json()
        results_url = data["results"]
        results_data = requests.get(self.args.url + results_url)

        with tempfile.NamedTemporaryFile(suffix='.oeb',
                                         mode='wb',
                                         delete=False) as temp:
            temp.write(results_data.content)
            temp.flush()
            with oechem.oemolistream(temp.name) as results:
                for mol in results.GetOEGraphMols():
                    cur_rank.append(
                        (oechem.OEMolToSmiles(mol), mol.GetTitle(),
                         float(oechem.OEGetSDData(mol, 'TanimotoCombo')),
                         self.baitset[0], False))
            os.remove(temp.name)
        return cur_rank

    def merge_ranking(self, ranking):
        merged_list = list()
        i = 0
        j = 0
        count = 0
        id_set = set()
        while i < len(self.ranking):
            while j < len(ranking) and ranking[j][2] > self.ranking[i][2]:
                if ranking[j][1] not in id_set:
                    if count < self.args.topn or ranking[j][2] == merged_list[
                            count - 1][2]:
                        merged_list.append(ranking[j])
                        count += 1
                        id_set.add(ranking[j][1])
                        j += 1
                    else:
                        break
                else:
                    j += 1

            if self.ranking[i][1] not in id_set:
                if self.ranking[i] not in id_set and (
                        count < self.args.topn
                        or self.ranking[i][2] == merged_list[count - 1][2]):
                    merged_list.append(self.ranking[i])
                    count += 1
                    id_set.add(self.ranking[i][1])
                    i += 1
                else:
                    break
            else:
                i += 1

        while j < len(ranking):
            if ranking[j][1] not in id_set:
                if ranking[j] not in id_set and (count < self.args.topn
                                                 or ranking[j][2]
                                                 == merged_list[count - 1][2]):
                    merged_list.append(ranking[j])
                    count += 1
                    id_set.add(ranking[j][1])
                    j += 1
                else:
                    break
            else:
                j += 1

        self.ranking = merged_list
Exemple #11
0
class OpenMMminimizeCube(ParallelOEMolComputeCube):
    title = 'Minimization Cube'

    version = "0.0.0"
    classification = [["Simulation", "OpenMM", "Minimization"]]
    tags = ['OpenMM', 'Parallel Cube']

    description = """
    Minimize the protein:ligand complex.

    This cube will take in the streamed complex.oeb.gz file containing
    the solvated protein:ligand complex and minimize it.

    Input parameters:
    steps (integer): the number of steps of minimization to apply. If 0
    the minimization will proceed until convergence is reached
    """

    # Override defaults for some parameters
    parameter_overrides = {
        "prefetch_count": {
            "default": 1
        },  # 1 molecule at a time
        "item_timeout": {
            "default": 43200
        },  # Default 12 hour limit (units are seconds)
        "item_count": {
            "default": 1
        }  # 1 molecule at a time
    }

    steps = parameter.IntegerParameter(
        'steps',
        default=0,
        help_text="""Number of minimization steps. 
                  If 0 the minimization will continue 
                  until convergence""")

    restraints = parameter.StringParameter(
        'restraints',
        default='',
        help_text="""Mask selection to apply restraints. Possible keywords are:
                  ligand, protein, water, ions, ca_protein, cofactors. 
                  The selection can be refined by using logical tokens: 
                  not, noh, and, or, diff, around""")

    restraintWt = parameter.DecimalParameter(
        'restraintWt',
        default=5.0,
        help_text="Restraint weight for xyz atom restraints in kcal/(mol A^2)")

    freeze = parameter.StringParameter(
        'freeze',
        default='',
        help_text="""Mask selection to freeze atoms along the MD 
                  simulation. Possible keywords are: ligand, protein, water, 
                  ions, ca_protein, cofactors. The selection can be refined by
                  using logical tokens: not, noh, and, or, diff, around""")

    temperature = parameter.DecimalParameter('temperature',
                                             default=300,
                                             help_text="Temperature (Kelvin)")

    nonbondedMethod = parameter.StringParameter(
        'nonbondedMethod',
        default='PME',
        choices=[
            'NoCutoff', 'CutoffNonPeriodic', 'CutoffPeriodic', 'PME', 'Ewald'
        ],
        help_text="NoCutoff, CutoffNonPeriodic, CutoffPeriodic, PME, or Ewald")

    nonbondedCutoff = parameter.DecimalParameter(
        'nonbondedCutoff',
        default=10,
        help_text="""The non-bonded cutoff in angstroms.
        This is ignored if the non-bonded method is NoCutoff.""")

    constraints = parameter.StringParameter(
        'constraints',
        default='HBonds',
        choices=['None', 'HBonds', 'HAngles', 'AllBonds'],
        help_text="""None, HBonds, HAngles, or AllBonds
        Which type of constraints to add to the system (e.g., SHAKE).
        None means no bonds are constrained.
        HBonds means bonds with hydrogen are constrained""")

    outfname = parameter.StringParameter(
        'outfname',
        default='min',
        help_text='Filename suffix for output simulation files')

    center = parameter.BooleanParameter(
        'center',
        default=False,
        description='Center the system to the OpenMM unit cell')

    verbose = parameter.BooleanParameter(
        'verbose', default=True, description='Increase log file verbosity')

    platform = parameter.StringParameter(
        'platform',
        default='Auto',
        choices=['Auto', 'Reference', 'CPU', 'CUDA', 'OpenCL'],
        help_text='Select which platform to use to run the simulation')

    cuda_opencl_precision = parameter.StringParameter(
        'cuda_opencl_precision',
        default='single',
        choices=['single', 'mixed', 'double'],
        help_text='Select the CUDA or OpenCL precision')

    def begin(self):
        self.opt = vars(self.args)
        self.opt['Logger'] = self.log
        self.opt['SimType'] = 'min'

        return

    def process(self, mol, port):
        try:
            # The copy of the dictionary option as local variable
            # is necessary to avoid filename collisions due to
            # the parallel cube processes
            opt = dict(self.opt)

            # Update cube simulation parameters with the eventually molecule SD tags
            new_args = {
                dp.GetTag(): dp.GetValue()
                for dp in oechem.OEGetSDDataPairs(mol)
                if dp.GetTag() in ["temperature"]
            }
            if new_args:
                for k in new_args:
                    try:
                        new_args[k] = float(new_args[k])
                    except:
                        pass
                self.log.info(
                    "Updating parameters for molecule: {}\n{}".format(
                        mol.GetTitle(), new_args))
                opt.update(new_args)

            if utils.PackageOEMol.checkTags(mol, ['Structure']):
                gd = utils.PackageOEMol.unpack(mol)
                opt['outfname'] = '{}-{}'.format(gd['IDTag'],
                                                 self.opt['outfname'])

            mdData = utils.MDData(mol)

            opt['molecule'] = mol

            self.log.info('MINIMIZING System: %s' % gd['IDTag'])
            simtools.simulation(mdData, **opt)

            packedmol = mdData.packMDData(mol)

            self.success.emit(packedmol)

        except Exception as e:
            # Attach error message to the molecule that failed
            self.log.error(traceback.format_exc())
            mol.SetData('error', str(e))
            # Return failed mol
            self.failure.emit(mol)

        return
Exemple #12
0
class OpenMMnptCube(ParallelOEMolComputeCube):
    title = 'NPT Cube'
    version = "0.0.0"
    classification = [["Simulation", "OpenMM", "NPT"]]
    tags = ['OpenMM', 'Parallel Cube']

    description = """NPT simulation of the protein:ligand complex.

    This cube will take in the streamed complex.oeb.gz file containing
    the solvated protein:ligand complex and will perform a MD simulation at
    constant temperature and pressure.

    Input parameters:
    ----------------
      picosec (decimal): Number of picoseconds to perform the complex simulation.
      temperature (decimal): target temperature
      pressure (decimal): target pressure
    """

    # Override defaults for some parameters
    parameter_overrides = {
        "prefetch_count": {
            "default": 1
        },  # 1 molecule at a time
        "item_timeout": {
            "default": 43200
        },  # Default 12 hour limit (units are seconds)
        "item_count": {
            "default": 1
        }  # 1 molecule at a time
    }

    temperature = parameter.DecimalParameter('temperature',
                                             default=300.0,
                                             help_text="Temperature (Kelvin)")

    pressure = parameter.DecimalParameter('pressure',
                                          default=1.0,
                                          help_text="Pressure (atm)")

    time = parameter.DecimalParameter(
        'time', default=10.0, help_text="NPT simulation time in picoseconds")

    restraints = parameter.StringParameter(
        'restraints',
        default='',
        help_text=""""Mask selection to apply restraints. Possible keywords are:
                  ligand, protein, water, ions, ca_protein, cofactors. 
                  Operational tokens are: and, not, noh""")

    restraintWt = parameter.DecimalParameter(
        'restraintWt',
        default=2.0,
        help_text="Restraint weight for xyz atom restraints in kcal/(mol ang^2)"
    )

    nonbondedMethod = parameter.StringParameter(
        'nonbondedMethod',
        default='PME',
        choices=[
            'NoCutoff', 'CutoffNonPeriodic', 'CutoffPeriodic', 'PME', 'Ewald'
        ],
        help_text="NoCutoff, CutoffNonPeriodic, CutoffPeriodic, PME, or Ewald."
    )

    nonbondedCutoff = parameter.DecimalParameter(
        'nonbondedCutoff',
        default=10,
        help_text="""The non-bonded cutoff in angstroms.
        This is ignored if non-bonded method is NoCutoff""")

    constraints = parameter.StringParameter(
        'constraints',
        default='HBonds',
        choices=['None', 'HBonds', 'HAngles', 'AllBonds'],
        help_text="""None, HBonds, HAngles, or AllBonds
        Which type of constraints to add to the system (e.g., SHAKE).
        None means no bonds are constrained.
        HBonds means bonds with hydrogen are constrained""")

    trajectory_filetype = parameter.StringParameter(
        'trajectory_filetype',
        default='DCD',
        choices=['DCD', 'NetCDF', 'HDF5'],
        help_text="NetCDF, DCD, HDF5. File type to write trajectory files")

    trajectory_interval = parameter.IntegerParameter(
        'trajectory_interval',
        default=0,
        help_text="Step interval for trajectory snapshots. If 0 the trajectory"
        "file will not be generated")

    reporter_interval = parameter.IntegerParameter(
        'reporter_interval',
        default=0,
        help_text="Step interval for reporting data. If 0 the reporter file"
        "will not be generated")

    outfname = parameter.StringParameter(
        'outfname',
        default='npt',
        help_text=
        'Filename suffix for output simulation files. Formatted: <title>-<outfname>'
    )

    tarxz = parameter.BooleanParameter(
        'tarxz',
        default=False,
        description='Create a tar.xz file of the attached data')

    center = parameter.BooleanParameter(
        'center',
        default=True,
        description='Center the system to the OpenMM unit cell')

    verbose = parameter.BooleanParameter(
        'verbose', default=True, description='Increase log file verbosity.')

    platform = parameter.StringParameter(
        'platform',
        default='Auto',
        choices=['Auto', 'Reference', 'CPU', 'CUDA', 'OpenCL'],
        help_text='Select which platform to use to run the simulation')

    cuda_opencl_precision = parameter.StringParameter(
        'cuda_opencl_precision',
        default='single',
        choices=['single', 'mixed', 'double'],
        help_text='Select the CUDA or OpenCL precision')

    def begin(self):
        self.opt = vars(self.args)
        self.opt['Logger'] = self.log
        self.opt['SimType'] = 'npt'

        return

    def process(self, mol, port):
        try:
            # The copy of the dictionary option as local variable
            # is necessary to avoid filename collisions due to
            # the parallel cube processes
            opt = dict(self.opt)
            if utils.PackageOEMol.checkTags(mol, ['Structure']):
                gd = utils.PackageOEMol.unpack(mol)
                opt['outfname'] = '{}-{}'.format(gd['IDTag'],
                                                 self.opt['outfname'])

            mdData = utils.MDData(mol)

            opt['molecule'] = mol

            self.log.info('START NPT SIMULATION %s' % gd['IDTag'])
            simtools.simulation(mdData, **opt)

            packedmol = mdData.packMDData(mol)

            self.success.emit(packedmol)

        except Exception as e:
            # Attach error message to the molecule that failed
            self.log.error(traceback.format_exc())
            mol.SetData('error', str(e))
            # Return failed mol
            self.failure.emit(mol)

        return
Exemple #13
0
class YankSolvationFECube(ParallelOEMolComputeCube):
    version = "0.0.0"
    title = "YankSolvationFECube"
    description = """
    Compute the hydration free energy of a small molecule with YANK.

    This cube uses the YANK alchemical free energy code to compute the
    transfer free energy of one or more small molecules from gas phase
    to the selected solvent.

    See http://getyank.org for more information about YANK.
    """
    classification = ["Alchemical free energy calculations"]
    tags = [tag for lists in classification for tag in lists]

    # Override defaults for some parameters
    parameter_overrides = {
        "prefetch_count": {"default": 1},  # 1 molecule at a time
        "item_timeout": {"default": 43200},  # Default 12 hour limit (units are seconds)
        "item_count": {"default": 1}  # 1 molecule at a time
    }

    temperature = parameter.DecimalParameter(
        'temperature',
        default=300.0,
        help_text="Temperature (Kelvin)")

    pressure = parameter.DecimalParameter(
        'pressure',
        default=1.0,
        help_text="Pressure (atm)")

    minimize = parameter.BooleanParameter(
        'minimize',
        default=False,
        help_text="Minimize input system")

    iterations = parameter.IntegerParameter(
        'iterations',
        default=1000,
        help_text="Number of iterations")

    nsteps_per_iteration = parameter.IntegerParameter(
        'nsteps_per_iteration',
        default=500,
        help_text="Number of steps per iteration")

    timestep = parameter.DecimalParameter(
        'timestep',
        default=2.0,
        help_text="Timestep (fs)")

    nonbondedCutoff = parameter.DecimalParameter(
        'nonbondedCutoff',
        default=10.0,
        help_text="The non-bonded cutoff in angstroms")

    verbose = parameter.BooleanParameter(
        'verbose',
        default=False,
        help_text="Print verbose YANK logging output")

    def begin(self):
        self.opt = vars(self.args)
        self.opt['Logger'] = self.log

    def process(self, solvated_system, port):

        try:
            # The copy of the dictionary option as local variable
            # is necessary to avoid filename collisions due to
            # the parallel cube processes
            opt = dict(self.opt)

            # Split the complex in components
            protein, solute, water, excipients = oeommutils.split(solvated_system, ligand_res_name='LIG')

            # Update cube simulation parameters with the eventually molecule SD tags
            new_args = {dp.GetTag(): dp.GetValue() for dp in oechem.OEGetSDDataPairs(solute) if dp.GetTag() in
                        ["temperature", "pressure"]}

            if new_args:
                for k in new_args:
                    try:
                        new_args[k] = float(new_args[k])
                    except:
                        pass
                self.log.info("Updating parameters for molecule: {}\n{}".format(solute.GetTitle(), new_args))
                opt.update(new_args)

            # Extract the MD data
            mdData = data_utils.MDData(solvated_system)
            solvated_structure = mdData.structure

            # Extract the ligand parmed structure
            solute_structure = solvated_structure.split()[0][0]
            solute_structure.box = None

            # Set the ligand title
            solute.SetTitle(solvated_system.GetTitle())

            # Create the solvated and vacuum system
            solvated_omm_sys = solvated_structure.createSystem(nonbondedMethod=app.PME,
                                                               nonbondedCutoff=opt['nonbondedCutoff'] * unit.angstroms,
                                                               constraints=app.HBonds,
                                                               removeCMMotion=False)

            solute_omm_sys = solute_structure.createSystem(nonbondedMethod=app.NoCutoff,
                                                           constraints=app.HBonds,
                                                           removeCMMotion=False)

            # This is a note from:
            # https://github.com/MobleyLab/SMIRNOFF_paper_code/blob/e5012c8fdc4570ca0ec750f7ab81dd7102e813b9/scripts/create_input_files.py#L114
            # Fix switching function.
            for force in solvated_omm_sys.getForces():
                if isinstance(force, openmm.NonbondedForce):
                    force.setUseSwitchingFunction(True)
                    force.setSwitchingDistance((opt['nonbondedCutoff'] - 1.0) * unit.angstrom)

            # Write out all the required files and set-run the Yank experiment
            with TemporaryDirectory() as output_directory:

                opt['Logger'].info("Output Directory {}".format(output_directory))

                solvated_structure_fn = os.path.join(output_directory, "solvated.pdb")
                solvated_structure.save(solvated_structure_fn, overwrite=True)

                solute_structure_fn = os.path.join(output_directory, "solute.pdb")
                solute_structure.save(solute_structure_fn, overwrite=True)

                solvated_omm_sys_serialized = XmlSerializer.serialize(solvated_omm_sys)
                solvated_omm_sys_serialized_fn = os.path.join(output_directory, "solvated.xml")
                solvated_f = open(solvated_omm_sys_serialized_fn, 'w')
                solvated_f.write(solvated_omm_sys_serialized)
                solvated_f.close()

                solute_omm_sys_serialized = XmlSerializer.serialize(solute_omm_sys)
                solute_omm_sys_serialized_fn = os.path.join(output_directory, "solute.xml")
                solute_f = open(solute_omm_sys_serialized_fn, 'w')
                solute_f.write(solute_omm_sys_serialized)
                solute_f.close()

                # Build the Yank Experiment
                yaml_builder = ExperimentBuilder(yank_solvation_template.format(
                                                 verbose='yes' if opt['verbose'] else 'no',
                                                 minimize='yes' if opt['minimize'] else 'no',
                                                 output_directory=output_directory,
                                                 timestep=opt['timestep'],
                                                 nsteps_per_iteration=opt['nsteps_per_iteration'],
                                                 number_iterations=opt['iterations'],
                                                 temperature=opt['temperature'],
                                                 pressure=opt['pressure'],
                                                 solvated_pdb_fn=solvated_structure_fn,
                                                 solvated_xml_fn=solvated_omm_sys_serialized_fn,
                                                 solute_pdb_fn=solute_structure_fn,
                                                 solute_xml_fn=solute_omm_sys_serialized_fn))

                # Run Yank
                yaml_builder.run_experiments()

                exp_dir = os.path.join(output_directory, "experiments")

                # Calculate solvation free energy, solvation Enthalpy and their errors
                DeltaG_solvation, dDeltaG_solvation, DeltaH, dDeltaH = yankutils.analyze_directory(exp_dir)

                # # Add result to the original molecule in kcal/mol
                oechem.OESetSDData(solute, 'DG_yank_solv', str(DeltaG_solvation))
                oechem.OESetSDData(solute, 'dG_yank_solv', str(dDeltaG_solvation))

            # Emit the ligand
            self.success.emit(solute)

        except Exception as e:
            # Attach an error message to the molecule that failed
            self.log.error(traceback.format_exc())
            solvated_system.SetData('error', str(e))
            # Return failed mol
            self.failure.emit(solvated_system)

        return
Exemple #14
0
class YankBindingCube(ParallelOEMolComputeCube):
    title = "YankBindingCube"
    description = """
    Compute thebinding free energy of a small molecule with YANK.

    This cube uses the YANK alchemical free energy code to compute the binding
    free energy of one or more small molecules using harmonic restraints.

    See http://getyank.org for more information about YANK.
    """
    classification = ["Alchemical free energy calculations"]
    tags = [tag for lists in classification for tag in lists]

    # Override defaults for some parameters
    parameter_overrides = {
        "prefetch_count": {
            "default": 1
        },  # 1 molecule at a time
        "item_timeout": {
            "default": 3600
        },  # Default 1 hour limit (units are seconds)
        "item_count": {
            "default": 1
        }  # 1 molecule at a time
    }

    #Define Custom Ports to handle oeb.gz files
    intake = CustomMoleculeInputPort('intake')
    success = CustomMoleculeOutputPort('success')
    failure = CustomMoleculeOutputPort('failure')

    # Receptor specification
    receptor = parameter.DataSetInputParameter(
        'receptor', required=True, help_text='Receptor structure file')

    # These can override YAML parameters
    nsteps_per_iteration = parameter.IntegerParameter(
        'nsteps_per_iteration',
        default=500,
        help_text="Number of steps per iteration")

    timestep = parameter.DecimalParameter('timestep',
                                          default=2.0,
                                          help_text="Timestep (fs)")

    simulation_time = parameter.DecimalParameter(
        'simulation_time',
        default=0.100,
        help_text="Simulation time (ns/replica)")

    temperature = parameter.DecimalParameter('temperature',
                                             default=300.0,
                                             help_text="Temperature (Kelvin)")

    pressure = parameter.DecimalParameter('pressure',
                                          default=1.0,
                                          help_text="Pressure (atm)")

    solvent = parameter.StringParameter(
        'solvent',
        default='gbsa',
        choices=['gbsa', 'pme', 'rf'],
        help_text="Solvent choice ['gbsa', 'pme', 'rf']")

    minimize = parameter.BooleanParameter(
        'minimize',
        default=True,
        help_text="Minimize initial structures for stability")

    randomize_ligand = parameter.BooleanParameter(
        'randomize_ligand',
        default=False,
        help_text="Randomize initial ligand position (implicit only)")

    verbose = parameter.BooleanParameter(
        'verbose',
        default=False,
        help_text="Print verbose YANK logging output")

    def construct_yaml(self, **kwargs):
        # Make substitutions to YAML here.
        # TODO: Can we override YAML parameters without having to do string substitutions?
        options = {
            'timestep':
            self.args.timestep,
            'nsteps_per_iteration':
            self.args.nsteps_per_iteration,
            'number_of_iterations':
            int(
                np.ceil(self.args.simulation_time * unit.nanoseconds /
                        (self.args.nsteps_per_iteration * self.args.timestep *
                         unit.femtoseconds))),
            'temperature':
            self.args.temperature,
            'pressure':
            self.args.pressure,
            'solvent':
            self.args.solvent,
            'minimize':
            'yes' if self.args.minimize else 'no',
            'verbose':
            'yes' if self.args.verbose else 'no',
            'randomize_ligand':
            'yes' if self.args.randomize_ligand else 'no',
        }

        for parameter in kwargs.keys():
            options[parameter] = kwargs[parameter]

        return binding_yaml_template % options

    def begin(self):
        # TODO: Is there another idiom to use to check valid input?
        if self.args.solvent not in ['gbsa', 'pme', 'rf']:
            raise Exception("solvent must be one of ['gbsa', 'pme', 'rf']")

        # Compute kT
        kB = unit.BOLTZMANN_CONSTANT_kB * unit.AVOGADRO_CONSTANT_NA  # Boltzmann constant
        self.kT = kB * (self.args.temperature * unit.kelvin)

        # Load receptor
        self.receptor = oechem.OEMol()
        receptor_filename = download_dataset_to_file(self.args.receptor)
        with oechem.oemolistream(receptor_filename) as ifs:
            if not oechem.OEReadMolecule(ifs, self.receptor):
                raise RuntimeError("Error reading receptor")

    def process(self, mol, port):
        kT_in_kcal_per_mole = self.kT.value_in_unit(unit.kilocalories_per_mole)

        # Retrieve data about which molecule we are processing
        title = mol.GetTitle()

        with TemporaryDirectory() as output_directory:
            try:
                # Print out which molecule we are processing
                self.log.info('Processing {} in {}.'.format(
                    title, output_directory))

                # Check that molecule is charged.
                if not molecule_is_charged(mol):
                    raise Exception(
                        'Molecule %s has no charges; input molecules must be charged.'
                        % mol.GetTitle())

                # Write the receptor.
                pdbfilename = os.path.join(output_directory, 'receptor.pdb')
                with oechem.oemolostream(pdbfilename) as ofs:
                    res = oechem.OEWriteConstMolecule(ofs, self.receptor)
                    if res != oechem.OEWriteMolReturnCode_Success:
                        raise RuntimeError(
                            "Error writing receptor: {}".format(res))

                # Write the specified molecule out to a mol2 file without changing its name.
                mol2_filename = os.path.join(output_directory, 'input.mol2')
                ofs = oechem.oemolostream(mol2_filename)
                oechem.OEWriteMol2File(ofs, mol)

                # Undo oechem fuckery with naming mol2 substructures `<0>`
                from YankCubes.utils import unfuck_oechem_mol2_file
                unfuck_oechem_mol2_file(mol2_filename)

                # Run YANK on the specified molecule.
                from yank.yamlbuild import YamlBuilder
                yaml = self.construct_yaml(output_directory=output_directory)
                yaml_builder = YamlBuilder(yaml)
                yaml_builder.build_experiments()
                self.log.info(
                    'Ran Yank experiments for molecule {}.'.format(title))

                # Analyze the binding free energy
                # TODO: Use yank.analyze API for this
                from YankCubes.analysis import analyze
                store_directory = os.path.join(output_directory, 'experiments')
                [DeltaG_binding, dDeltaG_binding] = analyze(store_directory)
                """
                # Extract trajectory (DEBUG)
                from yank.analyze import extract_trajectory
                trajectory_filename = 'trajectory.pdb'
                store_filename = os.path.join(store_directory, 'complex.pdb')
                extract_trajectory(trajectory_filename, store_filename, state_index=0, keep_solvent=False,
                       discard_equilibration=True, image_molecules=True)
                ifs = oechem.oemolistream(trajectory_filename)
                ifs.SetConfTest(oechem.OEAbsCanonicalConfTest()) # load multi-conformer molecule
                mol = oechem.OEMol()
                for mol in ifs.GetOEMols():
                    print (mol.GetTitle(), "has", mol.NumConfs(), "conformers")
                ifs.close()
                os.remove(trajectory_filename)
                """

                # Attach binding free energy estimates to molecule
                oechem.OESetSDData(mol, 'DeltaG_yank_binding',
                                   str(DeltaG_binding * kT_in_kcal_per_mole))
                oechem.OESetSDData(mol, 'dDeltaG_yank_binding',
                                   str(dDeltaG_binding * kT_in_kcal_per_mole))
                self.log.info(
                    'Analyzed and stored binding free energy for molecule {}.'.
                    format(title))

                # Emit molecule to success port.
                self.success.emit(mol)

            except Exception as e:
                self.log.info(
                    'Exception encountered when processing molecule {}.'.
                    format(title))
                # Attach error message to the molecule that failed
                # TODO: If there is an error in the leap setup log,
                # we should capture that and attach it to the failed molecule.
                self.log.error(traceback.format_exc())
                mol.SetData('error', str(e))
                # Return failed molecule
                self.failure.emit(mol)
Exemple #15
0
class LigandReader(SourceCube):
    title = "LigandReader Cube"
    version = "0.0.0"
    classification = [["Ligand Reader Cube", "OEChem", "Reader Cube"]]
    tags = ['OEChem']
    description = """
    Ligand Reader Cube 
    Input:
    -------
    oechem.OEMCMol or - Streamed-in of Ligands
    The input file can be an .oeb, .oeb.gz, .pdb or a .mol2 file

    Output:
    -------
    oechem.OEMCMol - Emits the Ligands
    """

    success = MoleculeOutputPort("success")

    data_in = parameter.DataSetInputParameter(
        "data_in",
        help_text="Ligand to read in",
        required=True,
        description="The Ligand to read in")

    limit = parameter.IntegerParameter(
        "limit",
        required=False)

    download_format = parameter.StringParameter(
        "download_format",
        choices=[".oeb.gz", ".oeb", ".pdb", ".mol2", ".smi"],
        required=False,
        default=".oeb.gz")

    prefix = parameter.StringParameter(
        'prefix',
        default='',
        help_text='An SD tag used as prefix string')

    suffix = parameter.StringParameter(
        'suffix',
        default='',
        help_text='An SD tag used as suffix string')

    type = parameter.StringParameter(
        'type',
        default='LIG',
        required=True,
        help_text='The ligand reside name')

    IDTag = parameter.BooleanParameter(
        'IDTag',
        default=True,
        required=False,
        help_text='If True/Checked ligands are enumerated by sequentially integers.'
                  'A SD tag containing part of the ligand name and an integer is used '
                  'to create a unique IDTag which is attached to the ligand')

    def begin(self):
        self.opt = vars(self.args)

    def __iter__(self):
        max_idx = self.args.limit
        if max_idx is not None:
            max_idx = int(max_idx)
        count = 0
        self.config = config_from_env()
        in_orion = self.config is not None
        if not in_orion:
            with oechem.oemolistream(str(self.args.data_in)) as ifs:
                for mol in ifs.GetOEMols():
                    mol.SetData(oechem.OEGetTag('prefix'), self.opt['prefix'])
                    mol.SetData(oechem.OEGetTag('suffix'), self.opt['suffix'])

                    for at in mol.GetAtoms():
                        residue = oechem.OEAtomGetResidue(at)
                        residue.SetName(self.opt['type'])
                        oechem.OEAtomSetResidue(at, residue)

                    if self.opt['IDTag']:
                        mol.SetData(oechem.OEGetTag('IDTag'), 'l' + mol.GetTitle()[0:12] + '_' + str(count))
                    yield mol
                    count += 1
                    if max_idx is not None and count == max_idx:
                        break
        else:
            stream = StreamingDataset(self.args.data_in,
                                      input_format=self.args.download_format)
            for mol in stream:
                mol.SetData(oechem.OEGetTag('prefix'), self.opt['prefix'])
                mol.SetData(oechem.OEGetTag('suffix'), self.opt['suffix'])

                for at in mol.GetAtoms():
                    residue = oechem.OEAtomGetResidue(at)
                    residue.SetName(self.opt['type'])
                    oechem.OEAtomSetResidue(at, residue)

                if self.opt['IDTag']:
                    mol.SetData(oechem.OEGetTag('IDTag'), 'l' + mol.GetTitle()[0:12] + '_'+str(count))
                yield mol
                count += 1
                if max_idx is not None and count == max_idx:
                    break
Exemple #16
0
class LigChargeCube(ParallelOEMolComputeCube):
    title = "Ligand Charge Cube"
    version = "0.0.0"
    classification = [["Ligand Preparation", "OEChem", "Ligand preparation"]]
    tags = ['OEChem', 'Quacpac']
    description = """
           This cube charges the Ligand by using the ELF10 charge method

           Input:
           -------
           oechem.OEMCMol - Streamed-in of the ligand molecules

           Output:
           -------
           oechem.OEMCMol - Emits the charged ligands
           """

    # Override defaults for some parameters
    parameter_overrides = {
        "prefetch_count": {
            "default": 1
        },  # 1 molecule at a time
        "item_timeout": {
            "default": 3600
        },  # Default 1 hour limit (units are seconds)
        "item_count": {
            "default": 1
        }  # 1 molecule at a time
    }

    max_conformers = parameter.IntegerParameter(
        'max_conformers',
        default=800,
        help_text="Max number of ligand conformers")

    def begin(self):
        self.opt = vars(self.args)
        self.opt['Logger'] = self.log

    def process(self, ligand, port):

        try:
            charged_ligand = None

            # Ligand sanitation
            ligand = oeommutils.sanitizeOEMolecule(ligand)

            if not oechem.OEHasPartialCharges(ligand):
                # Charge the ligand
                charged_ligand = ff_utils.assignELF10charges(
                    ligand, self.opt['max_conformers'], strictStereo=True)

            # If the ligand has been charged then transfer the computed
            # charges to the starting ligand
            if charged_ligand:
                map_charges = {
                    at.GetIdx(): at.GetPartialCharge()
                    for at in charged_ligand.GetAtoms()
                }
                for at in ligand.GetAtoms():
                    at.SetPartialCharge(map_charges[at.GetIdx()])

            self.success.emit(ligand)

        except Exception as e:
            # Attach error message to the molecule that failed
            self.log.error(traceback.format_exc())
            ligand.SetData('error', str(e))
            # Return failed mol
            self.failure.emit(ligand)

        return
Exemple #17
0
class YankBindingFECube(ParallelOEMolComputeCube):
    version = "0.0.0"
    title = "YankSolvationFECube"
    description = """
    Compute the hydration free energy of a small molecule with YANK.

    This cube uses the YANK alchemical free energy code to compute the
    transfer free energy of one or more small molecules from gas phase
    to the selected solvent.

    See http://getyank.org for more information about YANK.
    """
    classification = ["Alchemical free energy calculations"]
    tags = [tag for lists in classification for tag in lists]

    # The intake port is re-defined as batch port
    intake = BatchMoleculeInputPort("intake")

    # Override defaults for some parameters
    parameter_overrides = {
        "prefetch_count": {"default": 1},  # 1 molecule at a time
        "item_timeout": {"default": 43200},  # Default 12 hour limit (units are seconds)
        "item_count": {"default": 1}  # 1 molecule at a time
    }

    temperature = parameter.DecimalParameter(
        'temperature',
        default=300.0,
        help_text="Temperature (Kelvin)")

    pressure = parameter.DecimalParameter(
        'pressure',
        default=1.0,
        help_text="Pressure (atm)")

    minimize = parameter.BooleanParameter(
        'minimize',
        default=False,
        help_text="Minimize input system")

    iterations = parameter.IntegerParameter(
        'iterations',
        default=1000,
        help_text="Number of iterations")

    nsteps_per_iteration = parameter.IntegerParameter(
        'nsteps_per_iteration',
        default=500,
        help_text="Number of steps per iteration")

    timestep = parameter.DecimalParameter(
        'timestep',
        default=2.0,
        help_text="Timestep (fs)")

    nonbondedCutoff = parameter.DecimalParameter(
        'nonbondedCutoff',
        default=10.0,
        help_text="The non-bonded cutoff in angstroms")

    restraints = parameter.StringParameter(
        'restraints',
        default='Harmonic',
        choices=['FlatBottom', 'Harmonic', 'Boresch'],
        help_text='Select the restraint types')

    ligand_resname = parameter.StringParameter(
        'ligand_resname',
        default='LIG',
        help_text='The decoupling ligand residue name')

    verbose = parameter.BooleanParameter(
        'verbose',
        default=True,
        help_text="Print verbose YANK logging output")

    def begin(self):
        self.opt = vars(self.args)
        self.opt['Logger'] = self.log

    def process(self, solvated_system, port):

        try:
            opt = dict(self.opt)

            # Extract the solvated ligand and the solvated complex
            solvated_ligand = solvated_system[0]
            solvated_complex = solvated_system[1]

            # Update cube simulation parameters with the eventually molecule SD tags
            new_args = {dp.GetTag(): dp.GetValue() for dp in oechem.OEGetSDDataPairs(solvated_ligand) if dp.GetTag() in
                        ["temperature", "pressure"]}
            if new_args:
                for k in new_args:
                    try:
                        new_args[k] = float(new_args[k])
                    except:
                        pass
                self.log.info("Updating parameters for molecule: {}\n{}".format(solvated_ligand.GetTitle(), new_args))
                opt.update(new_args)

            # Extract the MD data
            mdData_ligand = data_utils.MDData(solvated_ligand)
            solvated_ligand_structure = mdData_ligand.structure

            mdData_complex = data_utils.MDData(solvated_complex)
            solvated_complex_structure = mdData_complex.structure

            # Create the solvated OpenMM systems
            solvated_complex_omm_sys = solvated_complex_structure.createSystem(nonbondedMethod=app.PME,
                                                                               nonbondedCutoff=opt['nonbondedCutoff'] * unit.angstroms,
                                                                               constraints=app.HBonds,
                                                                               removeCMMotion=False)

            solvated_ligand_omm_sys = solvated_ligand_structure.createSystem(nonbondedMethod=app.PME,
                                                                             nonbondedCutoff=opt['nonbondedCutoff'] * unit.angstroms,
                                                                             constraints=app.HBonds,
                                                                             removeCMMotion=False)

            # Write out all the required files and set-run the Yank experiment
            with TemporaryDirectory() as output_directory:

                opt['Logger'].info("Output Directory {}".format(output_directory))

                solvated_complex_structure_fn = os.path.join(output_directory, "complex.pdb")
                solvated_complex_structure.save(solvated_complex_structure_fn, overwrite=True)

                solvated_ligand_structure_fn = os.path.join(output_directory, "solvent.pdb")
                solvated_ligand_structure.save(solvated_ligand_structure_fn, overwrite=True)

                solvated_complex_omm_serialized = XmlSerializer.serialize(solvated_complex_omm_sys)
                solvated_complex_omm_serialized_fn = os.path.join(output_directory, "complex.xml")
                solvated_complex_f = open(solvated_complex_omm_serialized_fn, 'w')
                solvated_complex_f.write(solvated_complex_omm_serialized)
                solvated_complex_f.close()

                solvated_ligand_omm_serialized = XmlSerializer.serialize(solvated_ligand_omm_sys)
                solvated_ligand_omm_serialized_fn = os.path.join(output_directory, "solvent.xml")
                solvated_ligand_f = open(solvated_ligand_omm_serialized_fn, 'w')
                solvated_ligand_f.write(solvated_ligand_omm_serialized)
                solvated_ligand_f.close()

                # Build the Yank Experiment
                yaml_builder = ExperimentBuilder(yank_binding_template.format(
                    verbose='yes' if opt['verbose'] else 'no',
                    minimize='yes' if opt['minimize'] else 'no',
                    output_directory=output_directory,
                    timestep=opt['timestep'],
                    nsteps_per_iteration=opt['nsteps_per_iteration'],
                    number_iterations=opt['iterations'],
                    temperature=opt['temperature'],
                    pressure=opt['pressure'],
                    complex_pdb_fn=solvated_complex_structure_fn,
                    complex_xml_fn=solvated_complex_omm_serialized_fn,
                    solvent_pdb_fn=solvated_ligand_structure_fn,
                    solvent_xml_fn=solvated_ligand_omm_serialized_fn,
                    restraints=opt['restraints'],
                    ligand_resname=opt['ligand_resname']))

                # Run Yank
                yaml_builder.run_experiments()

                exp_dir = os.path.join(output_directory, "experiments")

                DeltaG_binding, dDeltaG_binding, DeltaH, dDeltaH = yankutils.analyze_directory(exp_dir)

                protein, ligand, water, excipients = oeommutils.split(solvated_ligand,
                                                                      ligand_res_name=opt['ligand_resname'])
                # Add result to the extracted ligand in kcal/mol
                oechem.OESetSDData(ligand, 'DG_yank_binding', str(DeltaG_binding))
                oechem.OESetSDData(ligand, 'dG_yank_binding', str(dDeltaG_binding))

            self.success.emit(ligand)

        except Exception as e:
            # Attach an error message to the molecule that failed
            self.log.error(traceback.format_exc())
            solvated_system[1].SetData('error', str(e))
            # Return failed mol
            self.failure.emit(solvated_system[1])

        return 
Exemple #18
0
class ProteinReader(SourceCube):
    title = "Protein Reader Cube"
    version = "0.0.0"
    classification = [["Protein Reader Cube", "OEChem", "Reader Cube"]]
    tags = ['OEChem']
    description = """
    A Protein Reader Cube 
    Input:
    -------
    oechem.OEMCMol or - Streamed-in of the protein system
    The input file can be an .oeb, .oeb.gz, .pdb or a .mol2 file

    Output:
    -------
    oechem.OEMCMol - Emits the protein system
    """

    success = MoleculeOutputPort("success")

    data_in = parameter.DataSetInputParameter(
        "data_in",
        help_text="Protein to read in",
        required=True,
        description="The Protein to read in")

    limit = parameter.IntegerParameter(
        "limit",
        required=False)

    download_format = parameter.StringParameter(
        "download_format",
        choices=[".oeb.gz", ".oeb", ".pdb", ".mol2", ".smi"],
        required=False,
        default=".oeb.gz")

    protein_prefix = parameter.StringParameter(
        'protein_prefix',
        default='PRT',
        help_text='The protein prefix name used to identify the protein')

    def begin(self):
        self.opt = vars(self.args)

    def __iter__(self):
        max_idx = self.args.limit
        if max_idx is not None:
            max_idx = int(max_idx)
        count = 0
        self.config = config_from_env()
        in_orion = self.config is not None
        if not in_orion:
            with oechem.oemolistream(str(self.args.data_in)) as ifs:
                for mol in ifs.GetOEMols():
                    mol.SetTitle(self.opt['protein_prefix'])
                    yield mol
                    count += 1
                    if max_idx is not None and count == max_idx:
                        break
        else:
            stream = StreamingDataset(self.args.data_in,
                                      input_format=self.args.download_format)
            for mol in stream:
                mol.SetTitle(self.opt['protein_prefix'])
                yield mol
                count += 1
                if max_idx is not None and count == max_idx:
                    break
Exemple #19
0
class DriveTorsion(Cube):
    """
        Drive the primary torsion.
    """

    cube_type = constants.CUBE_COMPUTE

    num_points = parameter.IntegerParameter(
        "num_points",
        title="Number of points at which to sample the dihedral.",
        default=24,
        min_value=4,
        max_value=36,
        description="""The number of evenly spaced torsion angles to sample in
        order to determine the enthalpy surface.""",
    )

    to_energy_calc = MoleculeOutputPort("to_energy_calc")

    conf_selection_tag = "SELECTED_CONFORMER"

    def process(self, mol, port):
        """
            The input to this cube will be an OEMol with one or more conformers
            with "CONFORMER_LABEL" SD Data of the form 'XY-1234567_1_2_3_4_00_00'
        """
        num_confs = mol.NumConfs()

        last_conf = mol.GetActive()
        last_conf_name = oechem.OEGetSDData(last_conf, "CONFORMER_LABEL")
        self.log.info(
            "Processing conformer {} on {} at {:%Y-%m-%d %H:%M:%S}".format(
                last_conf_name, os.environ["HOSTNAME"],
                datetime.datetime.now()))

        if num_confs == self.args.num_points:
            self.success.emit(mol)
            self.log.info(
                "Completed scan for {} on {} at {:%Y-%m-%d %H:%M:%S}".format(
                    mol.GetTitle(), os.environ["HOSTNAME"],
                    datetime.datetime.now()))
            return

        if num_confs == 1 and not mol.HasData(self.conf_selection_tag):
            self.log.info(
                "Conformer {} is a fresh starting conformer on {} at {:%Y-%m-%d %H:%M:%S}"
                .format(mol.GetTitle(), os.environ["HOSTNAME"],
                        datetime.datetime.now()))
            mol.SetIntData(self.conf_selection_tag, last_conf.GetIdx())
            last_conf.SetDoubleData("TORSION_ANGLE", 0.0)
            oechem.OESetSDData(last_conf, "TORSION_ANGLE", "0.0")
            self.log.info(
                "Sending conformer {} to energy calculation from {} at {:%Y-%m-%d %H:%M:%S}"
                .format(last_conf_name, os.environ["HOSTNAME"],
                        datetime.datetime.now()))
            self.to_energy_calc.emit(mol)
            return

        try:
            torsion_tag = "TORSION_ATOMS_FRAGMENT"
            torsion_atoms_in_fragment = get_sd_data(mol, torsion_tag).split()
            dihedral_atom_indices = [
                int(x) - 1 for x in torsion_atoms_in_fragment
            ]

            dih, _ = get_dihedral(mol, dihedral_atom_indices)
            dih_atoms = [x for x in dih.GetAtoms()]

            # if the last energy calculation failed
            if not oechem.OEHasSDData(last_conf, "PSI4_ENERGY"):
                self.log.info(
                    "Conformer {} found to have NO ENERGY on {} at {:%Y-%m-%d %H:%M:%S}"
                    .format(last_conf_name, os.environ["HOSTNAME"],
                            datetime.datetime.now()))
                mol.PopActive()
                last_conf = mol.GetActive()

            new_conf = mol.NewConf(last_conf)
            mol.PushActive(new_conf)
            conf_no = num_confs
            conformer_label = last_conf_name[:-3] + "_{:02d}".format(conf_no)
            oechem.OESetSDData(new_conf, "CONFORMER_LABEL", conformer_label)

            angle = num_confs * 2 * oechem.Pi / self.args.num_points
            angle_deg = oechem.Rad2Deg * angle
            new_conf.SetDoubleData("TORSION_ANGLE", angle_deg)
            oechem.OESetSDData(new_conf, "TORSION_ANGLE",
                               "{:.1f}".format(angle_deg))

            if not oechem.OESetTorsion(new_conf, dih_atoms[0], dih_atoms[1],
                                       dih_atoms[2], dih_atoms[3], angle):
                self.log.error(
                    "Could not rotate conformer {} by {:.1f} on {} at {:%Y-%m-%d %H:%M:%S}"
                    .format(
                        last_conf_name,
                        angle_deg,
                        os.environ["HOSTNAME"],
                        datetime.datetime.now(),
                    ))

            mol.SetIntData(self.conf_selection_tag, new_conf.GetIdx())
            self.log.info(
                "Sending conformer {} to energy calculation from {} at {:%Y-%m-%d %H:%M:%S}"
                .format(conformer_label, os.environ["HOSTNAME"],
                        datetime.datetime.now()))
            self.to_energy_calc.emit(mol)

        except Exception as e:
            self.log.error(
                "COuld not drive torsion in  conformer {} on {} at {:%Y-%m-%d %H:%M:%S}: {}"
                .format(last_conf_name, os.environ["HOSTNAME"],
                        datetime.datetime.now(), e))
            self.failure.emit(mol)
class PlotResults(SinkCube):
    """

    """

    classification = [["Compute", "Plot"]]

    fptype = parameter.IntegerParameter('fptype', default=105,
                                    help_text="Fingerprint type to use for the ranking")

    intake = ObjectInputPort('intake')
    name = FileOutputParameter('name',
                               required=True,
                               description='The name of the output file')

    def begin(self):
        self.in_orion = config_from_env() is not None
        if self.in_orion:
            self.stream = tempfile.NamedTemporaryFile()

    def write(self, data, port):
        self.results_avg = data[0]
        self.method = data[1]

        if self.method == 'Fingerprint':
            fptypes = {102 : 'path', 104 : 'circular', 105 : 'tree'}
            self.FPType = fptypes[self.args.fptype]
            self.name_ext = 'FP_' + self.FPType
        elif self.method == 'FastROCS':
            self.name_ext = 'FR'

        self.results_avg.plot(y = 'Average RR ' + self.name_ext, label = "Average RR" + self.name_ext)
        plt.xlabel('Top Rank Molecules')
        plt.ylabel('Rate (%)')
        plt.legend( loc='best')
        plt.title("Average RR Rates " + self.name_ext)
        if self.in_orion:
            plt.savefig(self.stream.name, format="svg")
            self.stream.seek(0)
            self.stream.flush()
            name = self.args.name + "_Average_RR_plot_" + self.name_ext + ".svg"
            resp = upload_file(name, self.stream.name)
            self.log.info("Created result file {} with ID {}".format(name, resp['id']))
        else:
            path = self.args.name + "Average_RR_plot_" + self.name_ext + ".svg"
            plt.savefig(path)

        self.results_avg.plot(y = 'Average HR ' + self.name_ext, label = "Average HR" + self.name_ext)
        plt.xlabel('Top Rank Molecules')
        plt.ylabel('Rate (%)')
        plt.legend( loc='best')
        plt.title("Average HR Rates " + self.name_ext)
        if self.in_orion:
            plt.savefig(self.stream.name, format="svg")
            self.stream.seek(0)
            self.stream.flush()
            name = self.args.name + "_Average_HR_plot_" + self.name_ext + ".svg"
            resp = upload_file(name, self.stream.name)
            self.log.info("Created result file {} with ID {}".format(name, resp['id']))
        else:
            path = self.args.name + "Average_HR_plot_" + self.name_ext + ".svg"
            plt.savefig(path)
class ParallelFastFPRanking(ParallelComputeCube):
    """
    A compute Cube that receives a Molecule and a list of Fingerprints with a baitset of indices
    and returns the max Similarity value of the Molecule against the Fingerprints
    """

    classification = [["Compute", "Fingerprint", "Similarity"]]

    url = parameter.StringParameter(
        'url',
        default="http://10.0.62.124:8081",
        help_text="Url of the FastFingerPrint Server for the request")

    fptype = parameter.IntegerParameter(
        'fptype',
        default=105,
        help_text="Fingerprint type to use for the ranking")

    topn = parameter.IntegerParameter(
        'topn',
        default=100,
        help_text=
        "Number of top molecules returned in the rankinNumber of top molecules returned in the ranking"
    )

    data_input = ObjectInputPort('data_input')
    success = ObjectOutputPort('success')

    def begin(self):
        #        self.max_tanimoto = 0
        #        self.fp = None
        #        self.fp_list = None
        #        self.baitset = None
        pass

    def process(self, data, port):

        self.act_list = data[0]
        self.baitset = data[1]
        self.ranking = data[2]
        fptypes = {102: 'path', 104: 'circular', 105: 'tree'}
        database = fptypes[self.args.fptype] + "_db"
        for idx in self.baitset[1]:
            smiles = oechem.OEMolToSmiles(self.act_list[idx])
            safe_smiles = parse.quote(smiles)
            url = "%s/%s/hitlist?smiles=%s&oformat=csv&maxhits=%d" % (
                self.args.url, database, safe_smiles, self.args.topn)
            response = requests.get(url)
            hitlist = response.content.decode().split('\n')
            hitlist.pop(0)
            hitlist.pop()
            cur_rank = list()
            for mol in hitlist:
                cur_mol = mol.split(',')
                cur_rank.append((cur_mol[0], cur_mol[1], float(cur_mol[4]),
                                 self.baitset[0], False))
            if len(self.ranking) == 0:
                self.ranking = cur_rank
            else:
                self.merge_ranking(cur_rank)

        #if self.fp_list is not None and self.baitset is not None:
        #with oechem.oemolistream(str(self.args.data_in)) as ifs:
        #    for mol in ifs.GetOEMols():
        #        max_tanimoto = 0
        #        fp = oegraphsim.OEFingerPrint()
        #        oegraphsim.OEMakeFP(fp, mol, self.args.fptype)
        #        for idx in self.baitset[1]:
        #            act_fp = self.fp_list[idx]
        #            tanimoto = oegraphsim.OETanimoto(fp, self.fp_list[idx])
        #            if tanimoto > max_tanimoto:
        #                max_tanimoto = tanimoto
        #        self.update_ranking(mol, max_tanimoto, False)

        self.success.emit((self.act_list, self.baitset, self.ranking))

    def merge_ranking(self, ranking):
        merged_list = list()
        i = 0
        j = 0
        count = 0
        id_set = set()
        while i < len(self.ranking):
            while j < len(ranking) and ranking[j][2] > self.ranking[i][2]:
                if ranking[j][1] not in id_set:
                    if count < self.args.topn or ranking[j][2] == merged_list[
                            count - 1][2]:
                        merged_list.append(ranking[j])
                        count += 1
                        id_set.add(ranking[j][1])
                        j += 1
                    else:
                        break
                else:
                    j += 1

            if self.ranking[i][1] not in id_set:
                if self.ranking[i] not in id_set and (
                        count < self.args.topn
                        or self.ranking[i][2] == merged_list[count - 1][2]):
                    merged_list.append(self.ranking[i])
                    count += 1
                    id_set.add(self.ranking[i][1])
                    i += 1
                else:
                    break
            else:
                i += 1

        while j < len(ranking):
            if ranking[j][1] not in id_set:
                if ranking[j] not in id_set and (count < self.args.topn
                                                 or ranking[j][2]
                                                 == merged_list[count - 1][2]):
                    merged_list.append(ranking[j])
                    count += 1
                    id_set.add(ranking[j][1])
                    j += 1
                else:
                    break
            else:
                j += 1

        self.ranking = merged_list

    def update_ranking(self, mol, max_tanimoto, ka_tag):
        index = 0
        if len(self.ranking) >= self.args.topn and max_tanimoto < self.ranking[
                len(self.ranking) - 1][2]:
            pass
        else:
            for top_mol in self.ranking:
                if max_tanimoto < top_mol[2]:
                    index = self.ranking.index(top_mol) + 1
                else:
                    break

            upper = self.ranking[:index]
            lower = self.ranking[index:]
            self.ranking = upper + [(oechem.OEMolToSmiles(mol), mol.GetTitle(),
                                     max_tanimoto, self.baitset[0], ka_tag)
                                    ] + lower

            i = self.args.topn - 1
            while i < len(self.ranking) - 1:
                if self.ranking[i][2] != self.ranking[i + 1][2]:
                    self.ranking = self.ranking[:i + 1]

                    break
                else:
                    i += 1
class ParallelFastFPInsertKA(ParallelComputeCube):
    """
    """

    classification = [["ParallelCompute"]]

    fptype = parameter.IntegerParameter(
        'fptype',
        default=105,
        help_text="Fingerprint type to use for the ranking")

    topn = parameter.IntegerParameter(
        'topn',
        default=100,
        help_text=
        "Number of top molecules returned in the rankinNumber of top molecules returned in the ranking"
    )

    data_input = ObjectInputPort('data_input')
    success = ObjectOutputPort('success')

    def process(self, data, port):

        self.act_list = data[0]
        self.baitset = data[1]
        self.ranking = data[2]
        self.fp_list = list()

        self.calculate_fp()
        self.insert_known_actives()

        self.success.emit(
            (self.act_list, self.baitset, self.ranking, 'Fingerprint'))

    def calculate_fp(self):

        for mol in self.act_list:
            fp = oegraphsim.OEFingerPrint()
            oegraphsim.OEMakeFP(fp, mol, self.args.fptype)

            self.fp_list.append(fp)

    def insert_known_actives(self):

        c = 0
        for idx in self.baitset[1]:
            while c < idx:
                ka_fp = self.fp_list[c]
                simval = self.calc_sim_val(ka_fp)
                self.update_ranking(self.act_list[c], simval, True)

                c += 1
            c += 1
        while c < len(self.act_list):
            ka_fp = self.fp_list[c]
            simval = self.calc_sim_val(ka_fp)
            self.update_ranking(self.act_list[c], simval, True)
            c += 1

    def calc_sim_val(self, fp):
        maxval = 0
        for idx in self.baitset[1]:
            tanimoto = oechem.OETanimoto(fp, self.fp_list[idx])
            if tanimoto > maxval:
                maxval = tanimoto
        return maxval

    def update_ranking(self, mol, max_tanimoto, ka_tag):
        index = 0
        if len(self.ranking) >= self.args.topn and max_tanimoto < self.ranking[
                len(self.ranking) - 1][2]:
            pass
        else:
            for top_mol in self.ranking:
                if max_tanimoto < top_mol[2]:
                    index = self.ranking.index(top_mol) + 1
                else:
                    break

            upper = self.ranking[:index]
            lower = self.ranking[index:]
            self.ranking = upper + [(oechem.OEMolToSmiles(mol), mol.GetTitle(),
                                     max_tanimoto, self.baitset[0], ka_tag)
                                    ] + lower

            i = self.args.topn - 1
            while i < len(self.ranking) - 1:
                if self.ranking[i][2] != self.ranking[i + 1][2]:
                    self.ranking = self.ranking[:i + 1]

                    break
                else:
                    i += 1

    def end(self):
        pass
Exemple #23
0
class ProteinReader(SourceCube):
    title = "Protein Reader Cube"
    version = "0.0.0"
    classification = [["Protein Reader Cube", "OEChem", "Reader Cube"]]
    tags = ['OEChem']
    description = """
    A Protein Reader Cube 
    Input:
    -------
    oechem.OEMCMol or - Streamed-in of the protein system
    The input file can be an .oeb, .oeb.gz, .pdb or a .mol2 file

    Output:
    -------
    oechem.OEMCMol - Emits the protein system
    """

    success = MoleculeOutputPort("success")

    data_in = parameter.DataSetInputParameter(
        "data_in",
        help_text="Protein to read in",
        required=True,
        description="The Protein to read in")

    limit = parameter.IntegerParameter(
        "limit",
        required=False)

    download_format = parameter.StringParameter(
        "download_format",
        choices=[".oeb.gz", ".oeb", ".pdb", ".mol2", ".smi"],
        required=False,
        default=".oeb.gz")

    protein_prefix = parameter.StringParameter(
        'protein_prefix',
        default='PRT',
        help_text='The protein prefix name used to identify the protein')

    def begin(self):
        self.opt = vars(self.args)

    def __iter__(self):
        max_idx = self.args.limit
        if max_idx is not None:
            max_idx = int(max_idx)
        count = 0
        self.config = config_from_env()
        in_orion = self.config is not None
        if not in_orion:
            with oechem.oemolistream(str(self.args.data_in)) as ifs:
                for mol in ifs.GetOEMols():
                    mol.SetTitle(self.opt['protein_prefix'])
                    yield mol
                    count += 1
                    if max_idx is not None and count == max_idx:
                        break
        else:
            stream = StreamingDataset(self.args.data_in,
                                      input_format=self.args.download_format)
            for mol in stream:
                mol.SetTitle(self.opt['protein_prefix'])
                yield mol
                count += 1
                if max_idx is not None and count == max_idx:
                    break


# class SimOutputCube(OEMolOStreamCube):
#     """
#     A sink cube that writes molecules to a file
#     """
#     classification = [["Output"]]
#     title = "Output Writer"
#
#     intake = BinaryMoleculeInputPort('intake')
#     data_out = DataSetOutputParameter('data_out',
#                                       required=True,
#                                       title='Name of Dataset to create',
#                                       description='The dataset to output')
#     backend = DataSetOutputParameter(
#         'backend',
#         default="auto",
#         choices=["db", "s3", "auto"],
#         description="The Orion storage backend to use")
#
#     def begin(self):
#         self.in_orion = config_from_env() is not None
#         self.decoder = MoleculeSerializerMixin()
#         self.need_decode = not self.args.data_out.endswith(".oeb.gz")
#         if self.in_orion:
#             self.ofs = MultipartDatasetUploader(self.args.data_out,
#                                                 tags=[self.name],
#                                                 backend=self.args.backend)
#         elif self.need_decode:
#             self.ofs = oechem.oemolostream(str(self.args.data_out))
#         else:
#             self.ofs = open(str(self.args.data_out), 'wb')
#
#     def write(self, mol, port):
#         if self.in_orion or not self.need_decode:
#             self.ofs.write(mol)
#         else:
#             oechem.OEWriteMolecule(self.ofs, self.decoder.decode(mol))
#
#     def end(self):
#         if self.in_orion:
#             self.ofs.complete()
#         else:
#             self.ofs.close()