Beispiel #1
0
    def test_atomtyper_elemental(self):
        """
        Test elemental atomtype sampler for hydrogen
        """
        atomtype_sampler = smarty.AtomTypeSampler(
            self.mols_alkethoh,
            self.basetypes,
            self.basetypes,
            self.combine_decs,
            replacements_filename=self.replacements,
            reference_typed_molecules=self.mols_alkethoh_ref,
            temperature=0.1,
            verbose=False,
            decorator_behavior='combinatorial-decorators',
            element=1)
        # run sampler with optional outputs
        traj = 'test_smarty.csv'
        plot = 'test_smarty.pdf'
        atomtype_sampler.run(5, traj)
        # test trajectory analysis functions on smarty output
        timeseries = score_utils.load_trajectory(traj)
        scores_vs_time = score_utils.scores_vs_time(timeseries)
        score_utils.create_plot_file(traj, plot, True, False)

        # check if score is 100% at first iteration
        if scores_vs_time['all'][0] == 1.0:
            raise Exception(
                "Scoring problem, 100% at first iteration for total")
Beispiel #2
0
 def test_random_sampler(self):
     """
     Test FragmentSampler runs for 10 iterations with no failures
     Test score_utils functions with the outputFile
     """
     typetag = 'Torsion'
     sampler = FragmentSampler(self.molecules,
                               typetag,
                               self.atom_OR_bases,
                               self.atom_OR_decors,
                               self.atom_AND_decors,
                               self.bond_OR_bases,
                               self.bond_AND_decors,
                               AtomIndexOdds=self.atom_odds,
                               BondIndexOdds=self.bond_odds,
                               replacements=self.replacements,
                               initialtypes=None,
                               SMIRFF=self.SMIRFF,
                               temperature=0.0,
                               outputFile=self.outputFile)
     fracfound = sampler.run(10)
     # load_trajectory converts csv file to dictionary
     timeseries = score_utils.load_trajectory('%s.csv' % self.outputFile)
     # scores_vs_time converts num/den entries to fractional scores
     scores_vs_time = score_utils.scores_vs_time(timeseries)
     # test plotting function
     score_utils.create_plot_file('%s.csv' % self.outputFile,
                                  '%s.pdf' % self.outputFile)
 def test_random_sampler(self):
     """
     Test FragmentSampler runs for 10 iterations with no failures
     Test score_utils functions with the outputFile
     """
     typetag = 'Torsion'
     sampler = FragmentSampler(self.molecules, typetag, self.atom_OR_bases,
             self.atom_OR_decors, self.atom_AND_decors, self.bond_OR_bases,
             self.bond_AND_decors,
             AtomIndexOdds = self.atom_odds, BondIndexOdds = self.bond_odds,
             replacements = self.replacements, initialtypes = None,
             SMIRFF = self.SMIRFF, temperature = 0.0, outputFile = self.outputFile)
     fracfound = sampler.run(10)
     # load_trajectory converts csv file to dictionary
     timeseries = score_utils.load_trajectory('%s.csv' % self.outputFile)
     # scores_vs_time converts num/den entries to fractional scores
     scores_vs_time = score_utils.scores_vs_time(timeseries)
     # test plotting function
     score_utils.create_plot_file('%s.csv' % self.outputFile, '%s.pdf' % self.outputFile)
Beispiel #4
0
    def run(self, niterations, trajFile=None, plotFile=None):
        """
        Run sampler for the specified number of iterations.

        Parameters
        ----------
        niterations : int
            The specified number of iterations
        trajFile : str, optional, default=None
            Output trajectory filename
        plotFile : str, optional, default=None
            Filename for output of plot of score versus time

        Returns
        ----------
        fraction_matched : float
            fraction of total types matched successfully at end of run

        """
        self.traj = []
        for iteration in range(niterations):
            if self.verbose:
                print("Iteration %d / %d" % (iteration, niterations))

            accepted = self.sample_types()
            typelist = [[env.asSMIRKS(), env.label] for env in self.envList]
            [typecounts, molecule_typecounts] = self.compute_type_statistics(typelist)

            if trajFile is not None:
                # Get data as list of csv strings
                lines = self.save_type_statistics(typelist, typecounts, molecule_typecounts, type_matches=self.type_matches)
                # Add lines to trajectory with iteration number:
                for l in lines:
                    self.traj.append('%i,%s \n' % (iteration, l))

            if self.verbose:
                if accepted:
                    print('Accepted.')
                else:
                    print('Rejected.')

                # Compute type statistics on molecules.
                self.show_type_statistics(typelist, typecounts, molecule_typecounts, type_matches=self.type_matches)
                print('')

                # TODO: figure out how to handle parent dictionary with chemical environments
                # Print parent tree as it is now.
                print("%s type hierarchy: will go HERE" % self.typetag)

        if trajFile is not None:
            # make "trajectory" file
            if os.path.isfile(trajFile):
                print("trajectory file already exists, it was overwritten")
            f = open(trajFile, 'w')
            start = ['Iteration,Index,Smarts,ParNum,ParentParNum,RefType,Matches,Molecules,FractionMatched,Denominator\n']
            f.writelines(start + self.traj)
            f.close()

            # Get/print some stats on trajectory
            # Load timeseries
            timeseries = load_trajectory( trajFile )
            time_fractions = scores_vs_time( timeseries )
            print("Maximum score achieved: %.2f" % max(time_fractions['all']))

        #Compute final type stats
        typelist = [ [env.asSMIRKS(), env.label] for env in self.envList]
        [self.type_matches, self.total_type_matches] = self.best_match_reference_types(typelist)
        [typecounts, molecule_typecounts] = self.compute_type_statistics(typelist)
        fraction_matched = self.show_type_matches(typelist, self.type_matches)

        # If verbose print parent tree:
        if self.verbose:
            # TODO: update to monitor parent/child hierarchy
            print("%s type hierarchy: will go HERE" % self.typetag)
            #self.print_parent_tree(roots, '\t')
        return fraction_matched
Beispiel #5
0
    def run(self, niterations, trajFile=None):
        """
        Run atomtype sampler for the specified number of iterations.
        Parameters
        ----------
        niterations : int
            The specified number of iterations
        trajFile : str, optional, default=None
            Output trajectory filename
        Returns
        ----------
        fraction_matched_atoms : float
            fraction of total atoms matched successfully at end of run
        """
        if trajFile is not None:
            # make "trajectory" file
            if os.path.isfile(trajFile):
                print("trajectory file already exists, it was overwritten")
            self.traj = open(trajFile, 'w')
            self.traj.write(
                'Iteration,Index,Smarts,Typename,ParentSMARTS,RefType,Matches,Molecules,FractionMatched,Denominator\n'
            )

        for iteration in range(niterations):
            if self.verbose:
                print("Iteration %d / %d" % (iteration, niterations))

            accepted = self.sample_atomtypes()
            [atom_typecounts, molecule_typecounts
             ] = self.compute_type_statistics(self.atomtypes, self.molecules,
                                              self.element)

            if trajFile is not None:
                # Get data as list of csv strings
                lines = self.save_type_statistics(
                    self.atomtypes,
                    atom_typecounts,
                    molecule_typecounts,
                    atomtype_matches=self.atom_type_matches)
                # Add lines to trajectory with iteration number:
                for l in lines:
                    self.traj.write('%i,%s \n' % (iteration, l))

            if self.verbose:
                if accepted:
                    print('Accepted.')
                else:
                    print('Rejected.')

                # Compute atomtype statistics on molecules.
                self.show_type_statistics(
                    self.atomtypes,
                    atom_typecounts,
                    molecule_typecounts,
                    atomtype_matches=self.atom_type_matches)
                print('')

                # Print parent tree as it is now.
                roots = [
                    r for r in self.child_to_parent.keys()
                    if self.child_to_parent[r] is None
                ]

                print("Atom type hierarchy:")
                self.print_parent_tree(roots, '\t')

        if trajFile is not None:
            self.traj.close()
            # Get/print some stats on trajectory
            # Load timeseries
            timeseries = load_trajectory(trajFile)
            time_fractions = scores_vs_time(timeseries)
            print("Maximum score achieved: %.2f" % max(time_fractions['all']))

        #Compute final type stats
        [atom_typecounts, molecule_typecounts
         ] = self.compute_type_statistics(self.atomtypes, self.molecules,
                                          self.element)
        fraction_matched_atoms = self.show_type_matches(self.atom_type_matches)

        # If verbose print parent tree:
        if self.verbose:
            roots = self.parents.keys()
            # Remove keys from roots if they are children
            for parent, children in self.parents.items():
                child_smarts = [smarts for [smarts, name] in children]
                for child in child_smarts:
                    if child in roots:
                        roots.remove(child)

            print("Atom type hierarchy:")
            self.print_parent_tree(roots, '\t')
        return fraction_matched_atoms
Beispiel #6
0
    def run(self, niterations, trajFile=None, plotFile=None):
        """
        Run atomtype sampler for the specified number of iterations.

        Parameters
        ----------
        niterations : int
            The specified number of iterations
        trajFile : str, optional, default=None
            Output trajectory filename
        plotFile : str, optional, default=None
            Filename for output of plot of score versus time

        Returns
        ----------
        fraction_matched_atoms : float
            fraction of total atoms matched successfully at end of run

        """
        self.traj = []
        for iteration in range(niterations):
            if self.verbose:
                print("Iteration %d / %d" % (iteration, niterations))

            accepted = self.sample_atomtypes()
            [atom_typecounts, molecule_typecounts] = self.compute_type_statistics(self.atomtypes, self.molecules)
            self.get_unfinishedAtomList(atom_typecounts, molecule_typecounts, atomtype_matches = self.atom_type_matches)

            if trajFile is not None:
                # Get data as list of csv strings
                lines = self.save_type_statistics(self.atomtypes, atom_typecounts, molecule_typecounts, atomtype_matches=self.atom_type_matches)
                # Add lines to trajectory with iteration number:
                for l in lines:
                    self.traj.append('%i,%s \n' % (iteration, l))

            if self.verbose:
                if accepted:
                    print('Accepted.')
                else:
                    print('Rejected.')

                # Compute atomtype statistics on molecules.
                self.show_type_statistics(self.atomtypes, atom_typecounts, molecule_typecounts, atomtype_matches=self.atom_type_matches)
                print('')

                # Print parent tree as it is now.
                roots = self.parents.keys()
                # Remove keys from roots if they are children
                for parent, children in self.parents.items():
                    child_smarts = [smarts for [smarts, name] in children]
                    for child in child_smarts:
                        if child in roots:
                            roots.remove(child)

                print("Atom type hierarchy:")
                self.print_parent_tree(roots, '\t')

        if trajFile is not None:
            # make "trajectory" file
            if os.path.isfile(trajFile):
                print("trajectory file already exists, it was overwritten")
            f = open(trajFile, 'w')
            start = ['Iteration,Index,Smarts,ParNum,ParentParNum,RefType,Matches,Molecules,FractionMatched,Denominator\n']
            f.writelines(start + self.traj)
            f.close()

            # Get/print some stats on trajectory
            # Load timeseries
            timeseries = load_trajectory( trajFile )
            time_fractions = scores_vs_time( timeseries )
            print("Maximum score achieved: %.2f" % max(time_fractions['all']))

        # If desired, make plot
        if plotFile:
            import pylab as pl
            if not trajFile:
                raise Exception("Cannot construct plot of trajectory without a trajectory file.")
            # Load timeseries
            timeseries = load_trajectory( trajFile )
            time_fractions = scores_vs_time( timeseries )

            # Plot overall score
            pl.plot( time_fractions['all'], 'k-', linewidth=2.0)

            # Grab reference types other than 'all'
            plot_others = False
            if plot_others:
                reftypes = time_fractions.keys()
                reftypes.remove('all')

                # Plot scores for individual types
                for reftype in reftypes:
                    pl.plot( time_fractions[reftype] )

            # Axis labels and legend
            pl.xlabel('Iteration')
            pl.ylabel('Fraction of reference type found')
            if plot_others:
                pl.legend(['all']+reftypes, loc="lower right")
            pl.ylim(-0.1, 1.1)

            # Save
            pl.savefig( plotFile )


        #Compute final type stats
        [atom_typecounts, molecule_typecounts] = self.compute_type_statistics(self.atomtypes, self.molecules)
        fraction_matched_atoms = self.show_type_matches(self.atom_type_matches)

        # If verbose print parent tree:
        if self.verbose:
            roots = self.parents.keys()
            # Remove keys from roots if they are children
            for parent, children in self.parents.items():
                child_smarts = [smarts for [smarts, name] in children]
                for child in child_smarts:
                    if child in roots:
                        roots.remove(child)

            print("Atom type hierarchy:")
            self.print_parent_tree(roots, '\t')
        return fraction_matched_atoms
    def run(self, niterations, trajFile=None):
        """
        Run atomtype sampler for the specified number of iterations.
        Parameters
        ----------
        niterations : int
            The specified number of iterations
        trajFile : str, optional, default=None
            Output trajectory filename
        Returns
        ----------
        fraction_matched_atoms : float
            fraction of total atoms matched successfully at end of run
        """
        if trajFile is not None:
            # make "trajectory" file
            if os.path.isfile(trajFile):
                print("trajectory file already exists, it was overwritten")
            self.traj = open(trajFile, 'w')
            self.traj.write('Iteration,Index,Smarts,Typename,ParentSMARTS,RefType,Matches,Molecules,FractionMatched,Denominator\n')

        for iteration in range(niterations):
            if self.verbose:
                print("Iteration %d / %d" % (iteration, niterations))

            accepted = self.sample_atomtypes()
            [atom_typecounts, molecule_typecounts] = self.compute_type_statistics(self.atomtypes, self.molecules, self.element)

            if trajFile is not None:
                # Get data as list of csv strings
                lines = self.save_type_statistics(self.atomtypes, atom_typecounts, molecule_typecounts, atomtype_matches=self.atom_type_matches)
                # Add lines to trajectory with iteration number:
                for l in lines:
                    self.traj.write('%i,%s \n' % (iteration, l))

            if self.verbose:
                if accepted:
                    print('Accepted.')
                else:
                    print('Rejected.')

                # Compute atomtype statistics on molecules.
                self.show_type_statistics(self.atomtypes, atom_typecounts, molecule_typecounts, atomtype_matches=self.atom_type_matches)
                print('')

                # Print parent tree as it is now.
                roots = [r for r in self.child_to_parent.keys() if self.child_to_parent[r] is None]

                print("Atom type hierarchy:")
                self.print_parent_tree(roots, '\t')

        if trajFile is not None:
            self.traj.close()
            # Get/print some stats on trajectory
            # Load timeseries
            timeseries = load_trajectory( trajFile )
            time_fractions = scores_vs_time( timeseries )
            print("Maximum score achieved: %.2f" % max(time_fractions['all']))


        #Compute final type stats
        [atom_typecounts, molecule_typecounts] = self.compute_type_statistics(self.atomtypes, self.molecules, self.element)
        fraction_matched_atoms = self.show_type_matches(self.atom_type_matches)

        # If verbose print parent tree:
        if self.verbose:
            roots = self.parents.keys()
            # Remove keys from roots if they are children
            for parent, children in self.parents.items():
                child_smarts = [smarts for [smarts, name] in children]
                for child in child_smarts:
                    if child in roots:
                        roots.remove(child)

            print("Atom type hierarchy:")
            self.print_parent_tree(roots, '\t')
        return fraction_matched_atoms
Beispiel #8
0
    def run(self, niterations, trajFile=None, plotFile=None):
        """
        Run atomtype sampler for the specified number of iterations.
        Parameters
        ----------
        niterations : int
            The specified number of iterations
        trajFile : str, optional, default=None
            Output trajectory filename
        plotFile : str, optional, default=None
            Filename for output of plot of score versus time
        Returns
        ----------
        fraction_matched_atoms : float
            fraction of total atoms matched successfully at end of run
        """
        self.traj = []
        for iteration in range(niterations):
            if self.verbose:
                print("Iteration %d / %d" % (iteration, niterations))

            accepted = self.sample_atomtypes()
            [atom_typecounts, molecule_typecounts] = self.compute_type_statistics(self.atomtypes, self.molecules)
            self.get_unfinishedAtomList(atom_typecounts, molecule_typecounts, atomtype_matches = self.atom_type_matches)

            if trajFile is not None:
                # Get data as list of csv strings
                lines = self.save_type_statistics(self.atomtypes, atom_typecounts, molecule_typecounts, atomtype_matches=self.atom_type_matches)
                # Add lines to trajectory with iteration number:
                for l in lines:
                    self.traj.append('%i,%s \n' % (iteration, l))

            if self.verbose:
                if accepted:
                    print('Accepted.')
                else:
                    print('Rejected.')

                # Compute atomtype statistics on molecules.
                self.show_type_statistics(self.atomtypes, atom_typecounts, molecule_typecounts, atomtype_matches=self.atom_type_matches)
                print('')

                # Print parent tree as it is now.
                roots = self.parents.keys()
                # Remove keys from roots if they are children
                for parent, children in self.parents.items():
                    child_smarts = [smarts for [smarts, name] in children]
                    for child in child_smarts:
                        if child in roots:
                            roots.remove(child)

                print("Atom type hierarchy:")
                self.print_parent_tree(roots, '\t')

        if trajFile is not None:
            # make "trajectory" file
            if os.path.isfile(trajFile):
                print("trajectory file already exists, it was overwritten")
            f = open(trajFile, 'w')
            start = ['Iteration,Index,Smarts,ParNum,ParentParNum,RefType,Matches,Molecules,FractionMatched,Denominator\n']
            f.writelines(start + self.traj)
            f.close()

            # Get/print some stats on trajectory
            # Load timeseries
            timeseries = load_trajectory( trajFile )
            time_fractions = scores_vs_time( timeseries )
            print("Maximum score achieved: %.2f" % max(time_fractions['all']))

        # If desired, make plot
        if plotFile:
            import pylab as pl
            if not trajFile:
                raise Exception("Cannot construct plot of trajectory without a trajectory file.")
            # Load timeseries
            timeseries = load_trajectory( trajFile )
            time_fractions = scores_vs_time( timeseries )

            # Plot overall score
            pl.plot( time_fractions['all'], 'k-', linewidth=2.0)

            # Grab reference types other than 'all'
            plot_others = False
            if plot_others:
                reftypes = time_fractions.keys()
                reftypes.remove('all')

                # Plot scores for individual types
                for reftype in reftypes:
                    pl.plot( time_fractions[reftype] )

            # Axis labels and legend
            pl.xlabel('Iteration')
            pl.ylabel('Fraction of reference type found')
            if plot_others:
                pl.legend(['all']+reftypes, loc="lower right")
            pl.ylim(-0.1, 1.1)

            # Save
            pl.savefig( plotFile )


        #Compute final type stats
        [atom_typecounts, molecule_typecounts] = self.compute_type_statistics(self.atomtypes, self.molecules)
        fraction_matched_atoms = self.show_type_matches(self.atom_type_matches)

        # If verbose print parent tree:
        if self.verbose:
            roots = self.parents.keys()
            # Remove keys from roots if they are children
            for parent, children in self.parents.items():
                child_smarts = [smarts for [smarts, name] in children]
                for child in child_smarts:
                    if child in roots:
                        roots.remove(child)

            print("Atom type hierarchy:")
            self.print_parent_tree(roots, '\t')
        return fraction_matched_atoms