예제 #1
0
class IndexInputCube(SourceCube):
    """
    An input cube that reads an index log and return the baitsets
    """

    classification = [["Input"]]

    success = ObjectOutputPort('success')

    limit = parameter.IntegerParameter(
        'limit',
        required=False,
        description='Read up to N items from this cube')

    data_in = parameter.DataSetInputParameter(
        'data_in',
        required=True,
        title='Index log',
        description='The index log to read from')

    def begin(self):
        self.in_orion = config_from_env() is not None
        if self.in_orion:
            #self.stream = stream_file(988)
            self.stream = stream_file(self.args.data_in)
        else:
            self.stream = open(str(self.args.data_in), 'rb')

    def __iter__(self):
        max_idx = self.args.limit
        if max_idx is not None:
            max_idx = int(max_idx)
        count = 0

        for chunk in self.stream:

            index_log = chunk.decode('utf-8')
            lines = index_log.split("set ")
            lines = lines[1:]
            for baitset in lines:
                baitset = baitset.split(" ")
                set_id = baitset[0][-2:-1]
                set_id = int(set_id)
                baitset = baitset[1:-1]
                for i, idx in enumerate(baitset):
                    if idx.isdigit():
                        baitset[i] = int(idx)
                count += 1
                if max_idx is not None and count == max_idx:
                    break
                yield (set_id, baitset)
예제 #2
0
class OEMolTriggeredIStreamCube(ComputeCube):
    """
    A source cube that uses oechem to read molecules
    """
    classification = [["Input"]]
    success = MoleculeOutputPort('success')

    title = "Dataset Reader"

    limit = parameter.IntegerParameter(
        'limit',
        required=False,
        description='Read up to N items from this cube')
    fp_input = ObjectInputPort('fp_input')
    data_in = parameter.DataSetInputParameter(
        'data_in',
        required=True,
        title='Dataset to read from',
        description='The dataset to read from')
    download_format = parameter.StringParameter(
        'download_format',
        choices=('.oeb.gz', '.oeb', '.smi', '.pdb', '.mol2'),
        required=False,
        description=
        'The stream format to be used for retrieving molecules from Orion',
        default=".oeb.gz")

    received_act = False

    def process(self, data, port):
        #print(data,port)
        if port is 'fp_input':
            print('Curry wurst')
            self.received_act = True
            max_idx = self.args.limit
            if max_idx is not None:
                max_idx = int(max_idx)
            count = 0
            with oechem.oemolistream(str(self.args.data_in)) as ifs:
                for mol in ifs.GetOEMols():
                    self.success.emit(mol)
                    count += 1
                    if max_idx is not None and count == max_idx:
                        break
예제 #3
0
class Test(SourceCube):
    success = ObjectOutputPort('success')
    data_in = parameter.DataSetInputParameter(
        'data_in',
        required=True,
        title='Index log',
        description='The index log to read from')

    def begin(self):
        pass

    def __iter__(self):
        molA = oechem.OEGraphMol()
        molB = oechem.OEGraphMol()
        oechem.OESmilesToMol(molA, 'c1cccc1 A')
        oechem.OESmilesToMol(molB, 'c1cccc1 B')
        act_list = [molA, molB]
        baitset = (0, [0])
        ranking = list()
        dataset_infos = (0, {"A": 0, "B": 1})
        yield (act_list, baitset, ranking, dataset_infos)

    def end(self):
        pass
예제 #4
0
class ForceFieldPrep(ParallelOEMolComputeCube):
    title = "Force Field Preparation Cube"
    version = "0.0.0"
    classification = [["Force Field Preparation", "OEChem", "Force Field preparation"]]
    tags = ['OEChem', 'OEBio', 'OpenMM']
    description = """
        Each complex is parametrized by using the selected force fields

        Input:
        -------
        oechem.OEMCMol - Streamed-in of complexes
      
        Output:
        -------
        oechem.OEMCMol - Emits force field parametrized complexes
        """

    # Override defaults for some parameters
    parameter_overrides = {
        "prefetch_count": {"default": 1}, # 1 molecule at a time
        "item_timeout": {"default": 3600}, # Default 1 hour limit (units are seconds)
        "item_count": {"default": 1} # 1 molecule at a time
    }

    protein_forcefield = parameter.DataSetInputParameter(
        'protein_forcefield',
        default='amber99sbildn.xml',
        help_text='Force field parameters for protein')

    solvent_forcefield = parameter.DataSetInputParameter(
        'solvent_forcefield',
        default='tip3p.xml',
        help_text='Force field parameters for solvent')

    ligand_forcefield = parameter.StringParameter(
        'ligand_forcefield',
        required=True,
        default='GAFF2',
        choices=['GAFF', 'GAFF2', 'SMIRNOFF'],
        help_text='Force field to parametrize the ligand')

    other_forcefield = parameter.StringParameter(
        'other_forcefield',
        required=True,
        default='GAFF2',
        choices=['GAFF', 'GAFF2', 'SMIRNOFF'],
        help_text='Force field used to parametrize other molecules not recognized by the protein force field')

    def begin(self):
        self.opt = vars(self.args)
        self.opt['Logger'] = self.log

    def process(self, mol, port):
        try:
            # Split the complex in components in order to apply the FF
            protein, ligand, water, excipients = utils.split(mol)

            # Unique prefix name used to output parametrization files
            self.opt['prefix_name'] = mol.GetTitle()

            # Apply FF to the Protein
            protein_structure = utils.applyffProtein(protein, self.opt)

            # Apply FF to water molecules
            water_structure = utils.applyffWater(water, self.opt)

            # Apply FF to the excipients
            if excipients.NumAtoms() > 0:
                excipient_structure = utils.applyffExcipients(excipients, self.opt)

                # The excipient order is set equal to the order in related
                # parmed structure to avoid possible atom index mismatching
                excipients = oeommutils.openmmTop_to_oemol(excipient_structure.topology,
                                                           excipient_structure.positions,
                                                           verbose=False)

            # Apply FF to the ligand
            ligand_structure = utils.applyffLigand(ligand, self.opt)

            # Build the Parmed structure
            if excipients.NumAtoms() > 0:
                complex_structure = protein_structure + ligand_structure + \
                                    excipient_structure + water_structure
            else:
                complex_structure = protein_structure + ligand_structure + water_structure

            num_atom_system = protein.NumAtoms() + ligand.NumAtoms() + excipients.NumAtoms() + water.NumAtoms()

            if not num_atom_system == complex_structure.topology.getNumAtoms():
                oechem.OEThrow.Fatal("Parmed and OE topologies mismatch atom number error")

            # Assemble a new OEMol complex in a specific order
            # to match the defined Parmed structure complex
            complx = protein.CreateCopy()
            oechem.OEAddMols(complx, ligand)
            oechem.OEAddMols(complx, excipients)
            oechem.OEAddMols(complx, water)

            complx.SetTitle(mol.GetTitle())

            # Set Parmed structure box_vectors
            vec_data = pack_utils.PackageOEMol.getData(complx, tag='box_vectors')
            vec = pack_utils.PackageOEMol.decodePyObj(vec_data)
            complex_structure.box_vectors = vec

            # Attach the Parmed structure to the complex
            packed_complex = pack_utils.PackageOEMol.pack(complx, complex_structure)

            # Attach the reference positions to the complex
            ref_positions = complex_structure.positions
            packedpos = pack_utils.PackageOEMol.encodePyObj(ref_positions)
            packed_complex.SetData(oechem.OEGetTag('OEMDDataRefPositions'), packedpos)

            # Set atom serial numbers, Ligand name and HETATM flag
            # oechem.OEPerceiveResidues(packed_complex, oechem.OEPreserveResInfo_SerialNumber)
            for at in packed_complex.GetAtoms():
                thisRes = oechem.OEAtomGetResidue(at)
                thisRes.SetSerialNumber(at.GetIdx())
                if thisRes.GetName() == 'UNL':
                    thisRes.SetName("LIG")
                    thisRes.SetHetAtom(True)
                oechem.OEAtomSetResidue(at, thisRes)

            if packed_complex.GetMaxAtomIdx() != complex_structure.topology.getNumAtoms():
                raise ValueError("OEMol complex and Parmed structure mismatch atom numbers")

            # Check if it is possible to create the OpenMM System
            system = complex_structure.createSystem(nonbondedMethod=app.CutoffPeriodic,
                                                    nonbondedCutoff=10.0 * unit.angstroms,
                                                    constraints=app.HBonds,
                                                    removeCMMotion=False)

            self.success.emit(packed_complex)
        except Exception as e:
            # Attach error message to the molecule that failed
            self.log.error(traceback.format_exc())
            mol.SetData('error', str(e))
            # Return failed mol
            self.failure.emit(mol)

        return
예제 #5
0
class ProteinReader(SourceCube):
    title = "Protein Reader Cube"
    version = "0.0.0"
    classification = [["Protein Reader Cube", "OEChem", "Reader Cube"]]
    tags = ['OEChem']
    description = """
    A Protein Reader Cube 
    Input:
    -------
    oechem.OEMCMol or - Streamed-in of the protein system
    The input file can be an .oeb, .oeb.gz, .pdb or a .mol2 file

    Output:
    -------
    oechem.OEMCMol - Emits the protein system
    """

    success = MoleculeOutputPort("success")

    data_in = parameter.DataSetInputParameter(
        "data_in",
        help_text="Protein to read in",
        required=True,
        description="The Protein to read in")

    limit = parameter.IntegerParameter(
        "limit",
        required=False)

    download_format = parameter.StringParameter(
        "download_format",
        choices=[".oeb.gz", ".oeb", ".pdb", ".mol2", ".smi"],
        required=False,
        default=".oeb.gz")

    protein_prefix = parameter.StringParameter(
        'protein_prefix',
        default='PRT',
        help_text='The protein prefix name used to identify the protein')

    def begin(self):
        self.opt = vars(self.args)

    def __iter__(self):
        max_idx = self.args.limit
        if max_idx is not None:
            max_idx = int(max_idx)
        count = 0
        self.config = config_from_env()
        in_orion = self.config is not None
        if not in_orion:
            with oechem.oemolistream(str(self.args.data_in)) as ifs:
                for mol in ifs.GetOEMols():
                    mol.SetTitle(self.opt['protein_prefix'])
                    yield mol
                    count += 1
                    if max_idx is not None and count == max_idx:
                        break
        else:
            stream = StreamingDataset(self.args.data_in,
                                      input_format=self.args.download_format)
            for mol in stream:
                mol.SetTitle(self.opt['protein_prefix'])
                yield mol
                count += 1
                if max_idx is not None and count == max_idx:
                    break
예제 #6
0
class YankBindingCube(ParallelOEMolComputeCube):
    title = "YankBindingCube"
    description = """
    Compute thebinding free energy of a small molecule with YANK.

    This cube uses the YANK alchemical free energy code to compute the binding
    free energy of one or more small molecules using harmonic restraints.

    See http://getyank.org for more information about YANK.
    """
    classification = ["Alchemical free energy calculations"]
    tags = [tag for lists in classification for tag in lists]

    # Override defaults for some parameters
    parameter_overrides = {
        "prefetch_count": {
            "default": 1
        },  # 1 molecule at a time
        "item_timeout": {
            "default": 3600
        },  # Default 1 hour limit (units are seconds)
        "item_count": {
            "default": 1
        }  # 1 molecule at a time
    }

    #Define Custom Ports to handle oeb.gz files
    intake = CustomMoleculeInputPort('intake')
    success = CustomMoleculeOutputPort('success')
    failure = CustomMoleculeOutputPort('failure')

    # Receptor specification
    receptor = parameter.DataSetInputParameter(
        'receptor', required=True, help_text='Receptor structure file')

    # These can override YAML parameters
    nsteps_per_iteration = parameter.IntegerParameter(
        'nsteps_per_iteration',
        default=500,
        help_text="Number of steps per iteration")

    timestep = parameter.DecimalParameter('timestep',
                                          default=2.0,
                                          help_text="Timestep (fs)")

    simulation_time = parameter.DecimalParameter(
        'simulation_time',
        default=0.100,
        help_text="Simulation time (ns/replica)")

    temperature = parameter.DecimalParameter('temperature',
                                             default=300.0,
                                             help_text="Temperature (Kelvin)")

    pressure = parameter.DecimalParameter('pressure',
                                          default=1.0,
                                          help_text="Pressure (atm)")

    solvent = parameter.StringParameter(
        'solvent',
        default='gbsa',
        choices=['gbsa', 'pme', 'rf'],
        help_text="Solvent choice ['gbsa', 'pme', 'rf']")

    minimize = parameter.BooleanParameter(
        'minimize',
        default=True,
        help_text="Minimize initial structures for stability")

    randomize_ligand = parameter.BooleanParameter(
        'randomize_ligand',
        default=False,
        help_text="Randomize initial ligand position (implicit only)")

    verbose = parameter.BooleanParameter(
        'verbose',
        default=False,
        help_text="Print verbose YANK logging output")

    def construct_yaml(self, **kwargs):
        # Make substitutions to YAML here.
        # TODO: Can we override YAML parameters without having to do string substitutions?
        options = {
            'timestep':
            self.args.timestep,
            'nsteps_per_iteration':
            self.args.nsteps_per_iteration,
            'number_of_iterations':
            int(
                np.ceil(self.args.simulation_time * unit.nanoseconds /
                        (self.args.nsteps_per_iteration * self.args.timestep *
                         unit.femtoseconds))),
            'temperature':
            self.args.temperature,
            'pressure':
            self.args.pressure,
            'solvent':
            self.args.solvent,
            'minimize':
            'yes' if self.args.minimize else 'no',
            'verbose':
            'yes' if self.args.verbose else 'no',
            'randomize_ligand':
            'yes' if self.args.randomize_ligand else 'no',
        }

        for parameter in kwargs.keys():
            options[parameter] = kwargs[parameter]

        return binding_yaml_template % options

    def begin(self):
        # TODO: Is there another idiom to use to check valid input?
        if self.args.solvent not in ['gbsa', 'pme', 'rf']:
            raise Exception("solvent must be one of ['gbsa', 'pme', 'rf']")

        # Compute kT
        kB = unit.BOLTZMANN_CONSTANT_kB * unit.AVOGADRO_CONSTANT_NA  # Boltzmann constant
        self.kT = kB * (self.args.temperature * unit.kelvin)

        # Load receptor
        self.receptor = oechem.OEMol()
        receptor_filename = download_dataset_to_file(self.args.receptor)
        with oechem.oemolistream(receptor_filename) as ifs:
            if not oechem.OEReadMolecule(ifs, self.receptor):
                raise RuntimeError("Error reading receptor")

    def process(self, mol, port):
        kT_in_kcal_per_mole = self.kT.value_in_unit(unit.kilocalories_per_mole)

        # Retrieve data about which molecule we are processing
        title = mol.GetTitle()

        with TemporaryDirectory() as output_directory:
            try:
                # Print out which molecule we are processing
                self.log.info('Processing {} in {}.'.format(
                    title, output_directory))

                # Check that molecule is charged.
                if not molecule_is_charged(mol):
                    raise Exception(
                        'Molecule %s has no charges; input molecules must be charged.'
                        % mol.GetTitle())

                # Write the receptor.
                pdbfilename = os.path.join(output_directory, 'receptor.pdb')
                with oechem.oemolostream(pdbfilename) as ofs:
                    res = oechem.OEWriteConstMolecule(ofs, self.receptor)
                    if res != oechem.OEWriteMolReturnCode_Success:
                        raise RuntimeError(
                            "Error writing receptor: {}".format(res))

                # Write the specified molecule out to a mol2 file without changing its name.
                mol2_filename = os.path.join(output_directory, 'input.mol2')
                ofs = oechem.oemolostream(mol2_filename)
                oechem.OEWriteMol2File(ofs, mol)

                # Undo oechem fuckery with naming mol2 substructures `<0>`
                from YankCubes.utils import unfuck_oechem_mol2_file
                unfuck_oechem_mol2_file(mol2_filename)

                # Run YANK on the specified molecule.
                from yank.yamlbuild import YamlBuilder
                yaml = self.construct_yaml(output_directory=output_directory)
                yaml_builder = YamlBuilder(yaml)
                yaml_builder.build_experiments()
                self.log.info(
                    'Ran Yank experiments for molecule {}.'.format(title))

                # Analyze the binding free energy
                # TODO: Use yank.analyze API for this
                from YankCubes.analysis import analyze
                store_directory = os.path.join(output_directory, 'experiments')
                [DeltaG_binding, dDeltaG_binding] = analyze(store_directory)
                """
                # Extract trajectory (DEBUG)
                from yank.analyze import extract_trajectory
                trajectory_filename = 'trajectory.pdb'
                store_filename = os.path.join(store_directory, 'complex.pdb')
                extract_trajectory(trajectory_filename, store_filename, state_index=0, keep_solvent=False,
                       discard_equilibration=True, image_molecules=True)
                ifs = oechem.oemolistream(trajectory_filename)
                ifs.SetConfTest(oechem.OEAbsCanonicalConfTest()) # load multi-conformer molecule
                mol = oechem.OEMol()
                for mol in ifs.GetOEMols():
                    print (mol.GetTitle(), "has", mol.NumConfs(), "conformers")
                ifs.close()
                os.remove(trajectory_filename)
                """

                # Attach binding free energy estimates to molecule
                oechem.OESetSDData(mol, 'DeltaG_yank_binding',
                                   str(DeltaG_binding * kT_in_kcal_per_mole))
                oechem.OESetSDData(mol, 'dDeltaG_yank_binding',
                                   str(dDeltaG_binding * kT_in_kcal_per_mole))
                self.log.info(
                    'Analyzed and stored binding free energy for molecule {}.'.
                    format(title))

                # Emit molecule to success port.
                self.success.emit(mol)

            except Exception as e:
                self.log.info(
                    'Exception encountered when processing molecule {}.'.
                    format(title))
                # Attach error message to the molecule that failed
                # TODO: If there is an error in the leap setup log,
                # we should capture that and attach it to the failed molecule.
                self.log.error(traceback.format_exc())
                mol.SetData('error', str(e))
                # Return failed molecule
                self.failure.emit(mol)
예제 #7
0
class LigandReader(SourceCube):
    title = "LigandReader Cube"
    version = "0.0.0"
    classification = [["Ligand Reader Cube", "OEChem", "Reader Cube"]]
    tags = ['OEChem']
    description = """
    Ligand Reader Cube 
    Input:
    -------
    oechem.OEMCMol or - Streamed-in of Ligands
    The input file can be an .oeb, .oeb.gz, .pdb or a .mol2 file

    Output:
    -------
    oechem.OEMCMol - Emits the Ligands
    """

    success = MoleculeOutputPort("success")

    data_in = parameter.DataSetInputParameter(
        "data_in",
        help_text="Ligand to read in",
        required=True,
        description="The Ligand to read in")

    limit = parameter.IntegerParameter(
        "limit",
        required=False)

    download_format = parameter.StringParameter(
        "download_format",
        choices=[".oeb.gz", ".oeb", ".pdb", ".mol2", ".smi"],
        required=False,
        default=".oeb.gz")

    prefix = parameter.StringParameter(
        'prefix',
        default='',
        help_text='An SD tag used as prefix string')

    suffix = parameter.StringParameter(
        'suffix',
        default='',
        help_text='An SD tag used as suffix string')

    type = parameter.StringParameter(
        'type',
        default='LIG',
        required=True,
        help_text='The ligand reside name')

    IDTag = parameter.BooleanParameter(
        'IDTag',
        default=True,
        required=False,
        help_text='If True/Checked ligands are enumerated by sequentially integers.'
                  'A SD tag containing part of the ligand name and an integer is used '
                  'to create a unique IDTag which is attached to the ligand')

    def begin(self):
        self.opt = vars(self.args)

    def __iter__(self):
        max_idx = self.args.limit
        if max_idx is not None:
            max_idx = int(max_idx)
        count = 0
        self.config = config_from_env()
        in_orion = self.config is not None
        if not in_orion:
            with oechem.oemolistream(str(self.args.data_in)) as ifs:
                for mol in ifs.GetOEMols():
                    mol.SetData(oechem.OEGetTag('prefix'), self.opt['prefix'])
                    mol.SetData(oechem.OEGetTag('suffix'), self.opt['suffix'])

                    for at in mol.GetAtoms():
                        residue = oechem.OEAtomGetResidue(at)
                        residue.SetName(self.opt['type'])
                        oechem.OEAtomSetResidue(at, residue)

                    if self.opt['IDTag']:
                        mol.SetData(oechem.OEGetTag('IDTag'), 'l' + mol.GetTitle()[0:12] + '_' + str(count))
                    yield mol
                    count += 1
                    if max_idx is not None and count == max_idx:
                        break
        else:
            stream = StreamingDataset(self.args.data_in,
                                      input_format=self.args.download_format)
            for mol in stream:
                mol.SetData(oechem.OEGetTag('prefix'), self.opt['prefix'])
                mol.SetData(oechem.OEGetTag('suffix'), self.opt['suffix'])

                for at in mol.GetAtoms():
                    residue = oechem.OEAtomGetResidue(at)
                    residue.SetName(self.opt['type'])
                    oechem.OEAtomSetResidue(at, residue)

                if self.opt['IDTag']:
                    mol.SetData(oechem.OEGetTag('IDTag'), 'l' + mol.GetTitle()[0:12] + '_'+str(count))
                yield mol
                count += 1
                if max_idx is not None and count == max_idx:
                    break
예제 #8
0
class FREDDocking(OEMolComputeCube):
    title = "FRED Docking"
    version = "0.0.1"
    classification = [["Ligand Preparation", "OEDock", "FRED"],
                      ["Ligand Preparation", "OEDock", "ChemGauss4"]]
    tags = ['OEDock', 'FRED']
    description = """
    Dock molecules using the FRED docking engine against a prepared receptor file.
    Return the top scoring pose.

    Input:
    -------
    receptor - Requires a prepared receptor (oeb.gz) file of the protein to dock molecules against.
    oechem.OEMCMol - Expects a charged multi-conformer molecule on input port.

    Output:
    -------
    oechem.OEMol - Emits the top scoring pose of the molecule with attachments:
        - SDData Tags: { ChemGauss4 : pose score }
    """

    receptor = parameter.DataSetInputParameter('receptor',
                                               required=True,
                                               help_text='Receptor OEB File')

    def begin(self):
        receptor = oechem.OEGraphMol()
        self.args.receptor = utils.download_dataset_to_file(self.args.receptor)
        if not oedocking.OEReadReceptorFile(receptor, str(self.args.receptor)):
            raise Exception("Unable to read receptor from {0}".format(
                self.args.receptor))

        # Initialize Docking
        dock_method = oedocking.OEDockMethod_Hybrid
        if not oedocking.OEReceptorHasBoundLigand(receptor):
            oechem.OEThrow.Warning(
                "No bound ligand, switching OEDockMethod to ChemGauss4.")
            dock_method = oedocking.OEDockMethod_Chemgauss4
        dock_resolution = oedocking.OESearchResolution_Default
        self.sdtag = oedocking.OEDockMethodGetName(dock_method)
        self.dock = oedocking.OEDock(dock_method, dock_resolution)
        if not self.dock.Initialize(receptor):
            raise Exception("Unable to initialize Docking with {0}".format(
                self.args.receptor))

    def clean(self, mol):
        mol.DeleteData('CLASH')
        mol.DeleteData('CLASHTYPE')
        mol.GetActive().DeleteData('CLASH')
        mol.GetActive().DeleteData('CLASHTYPE')

    def process(self, mcmol, port):
        try:
            dockedMol = oechem.OEMol()
            res = self.dock.DockMultiConformerMolecule(dockedMol, mcmol)
            if res == oedocking.OEDockingReturnCode_Success:
                oedocking.OESetSDScore(dockedMol, self.dock, self.sdtag)
                self.dock.AnnotatePose(dockedMol)
                score = self.dock.ScoreLigand(dockedMol)
                self.log.info("{} {} score = {:.4f}".format(
                    self.sdtag, dockedMol.GetTitle(), score))
                oechem.OESetSDData(dockedMol, self.sdtag, "{}".format(score))
                self.clean(dockedMol)
                self.success.emit(dockedMol)

        except Exception as e:
            # Attach error message to the molecule that failed
            self.log.error(traceback.format_exc())
            mcmol.SetData('error', str(e))
            # Return failed molecule
            self.failure.emit(mcmol)

    def end(self):
        pass
예제 #9
0
class ProteinReader(SourceCube):
    title = "Protein Reader Cube"
    version = "0.0.0"
    classification = [["Protein Reader Cube", "OEChem", "Reader Cube"]]
    tags = ['OEChem']
    description = """
    A Protein Reader Cube 
    Input:
    -------
    oechem.OEMCMol or - Streamed-in of the protein system
    The input file can be an .oeb, .oeb.gz, .pdb or a .mol2 file

    Output:
    -------
    oechem.OEMCMol - Emits the protein system
    """

    success = MoleculeOutputPort("success")

    data_in = parameter.DataSetInputParameter(
        "data_in",
        help_text="Protein to read in",
        required=True,
        description="The Protein to read in")

    limit = parameter.IntegerParameter(
        "limit",
        required=False)

    download_format = parameter.StringParameter(
        "download_format",
        choices=[".oeb.gz", ".oeb", ".pdb", ".mol2", ".smi"],
        required=False,
        default=".oeb.gz")

    protein_prefix = parameter.StringParameter(
        'protein_prefix',
        default='PRT',
        help_text='The protein prefix name used to identify the protein')

    def begin(self):
        self.opt = vars(self.args)

    def __iter__(self):
        max_idx = self.args.limit
        if max_idx is not None:
            max_idx = int(max_idx)
        count = 0
        self.config = config_from_env()
        in_orion = self.config is not None
        if not in_orion:
            with oechem.oemolistream(str(self.args.data_in)) as ifs:
                for mol in ifs.GetOEMols():
                    mol.SetTitle(self.opt['protein_prefix'])
                    yield mol
                    count += 1
                    if max_idx is not None and count == max_idx:
                        break
        else:
            stream = StreamingDataset(self.args.data_in,
                                      input_format=self.args.download_format)
            for mol in stream:
                mol.SetTitle(self.opt['protein_prefix'])
                yield mol
                count += 1
                if max_idx is not None and count == max_idx:
                    break


# class SimOutputCube(OEMolOStreamCube):
#     """
#     A sink cube that writes molecules to a file
#     """
#     classification = [["Output"]]
#     title = "Output Writer"
#
#     intake = BinaryMoleculeInputPort('intake')
#     data_out = DataSetOutputParameter('data_out',
#                                       required=True,
#                                       title='Name of Dataset to create',
#                                       description='The dataset to output')
#     backend = DataSetOutputParameter(
#         'backend',
#         default="auto",
#         choices=["db", "s3", "auto"],
#         description="The Orion storage backend to use")
#
#     def begin(self):
#         self.in_orion = config_from_env() is not None
#         self.decoder = MoleculeSerializerMixin()
#         self.need_decode = not self.args.data_out.endswith(".oeb.gz")
#         if self.in_orion:
#             self.ofs = MultipartDatasetUploader(self.args.data_out,
#                                                 tags=[self.name],
#                                                 backend=self.args.backend)
#         elif self.need_decode:
#             self.ofs = oechem.oemolostream(str(self.args.data_out))
#         else:
#             self.ofs = open(str(self.args.data_out), 'wb')
#
#     def write(self, mol, port):
#         if self.in_orion or not self.need_decode:
#             self.ofs.write(mol)
#         else:
#             oechem.OEWriteMolecule(self.ofs, self.decoder.decode(mol))
#
#     def end(self):
#         if self.in_orion:
#             self.ofs.complete()
#         else:
#             self.ofs.close()