예제 #1
0
    def __init__(self, parent, stype='st'):

        if stype not in ['dm15', 'st']:
            raise ValueError, "This model only supports the dm15 and st parameter"
        model.__init__(self, parent)
        self.rbs = ['u', 'B', 'V', 'g', 'r', 'i', 'Y', 'J', 'H']
        self.parameters = {
            'DM': None,
            stype: None,
            'EBVhost': None,
            'Tmax': None
        }
        self.errors = {'DM': 0, stype: 0, 'EBVhost': 0, 'Tmax': 0}
        if stype == 'dm15':
            self.template = NIR_ubertemp.template()
        else:
            self.template = NIR_ubertemp.stemplate()
        self.stype = stype

        if stype in ['st']:
            self.a, self.ea, self.b, self.eb, self.c, self.ec, self.Rv_host, self.eRv_host, self.sigSN = read_table(
                os.path.join(base, 'st_calibration2.dat'))
        else:
            self.a, self.ea, self.b, self.eb, self.c, self.ec, self.Rv_host, self.eRv_host, self.sigSN = read_table(
                os.path.join(base, 'dm15_calibration2.dat'))

        self.do_Robs = 0
        self.Robs = {}
예제 #2
0
    def __init__ (self, vpath):
        model.__init__(self, vpath)

        ##
        ## General settings
        ##
        self.buildable = True
        self.quantitative = True
        self.confidential = False
        self.identity = True
        self.SDFileName = 'name'
        self.SDFileActivity = 'activity'

        
        ##
        ## Normalization settings
        ##
        self.norm = True
        self.normStand = True
        self.normNeutr = False
        self.normNeutrMethod = 'moka'
        self.normNeutr_pH = 7.4
        self.norm3D = True

        ##
        ## Molecular descriptor settings
        ##
        self.MD = 'adriana'                         # 'padel'|'pentacle'|'adriana'
        self.padelMD = ['-2d','-3d']                       # '-2d'|'-3d'
        self.padelMaxRuntime = None
        self.padelDescriptor = None
        self.pentacleProbes = ['DRY','O','N1','TIP']
        self.pentacleOthers = []

        ##
        ## Modeling settings
        ##
        self.model = 'pls'
        self.modelLV = 2
        self.modelAutoscaling = True
        self.modelCutoff = 'auto'
        self.selVar = True
        #self.selVarMethod = GOLPE
        self.selVarLV = 2
        #self.selVarCV = 'LOO'
        self.selVarRun = 2
        self.selVarMask = None

        ##
        ## Path to external programs
        ##
        self.mokaPath = '/opt/blabber/blabber4eTOX/'
        self.padelPath = '/opt/padel/padel218ws/'
        self.padelURL = 'http://localhost:9000/computedescriptors?params=' 
        self.pentaclePath = '/opt/pentacle/pentacle106eTOX/'
        self.adrianaPath = '/opt/AdrianaCode/AdrianaCode226/'
        self.corinaPath = '/opt/corina/corina3494/'
        self.javaPath = '/usr/java/jdk1.7.0_51/'
        self.RPath = '/opt/R/R-3.0.2/'
        self.standardiserPath = '/opt/standardiser/standardise20140206/'
 def __init__(self, isTest):
     model.__init__(self, 'RNN', 'RNN network')
     Conf = Configuration_RNN()
     Conf.Return()
     self.input_layer_input_w, self.input_layer_output_w, self.input_layer_output_b, self.core_layer_input_w, self.core_layer_output_w, self.core_layer_output_b, self.action_layer_input_w, self.action_layer_output_w, self.action_layer_output_b, self.batch_size, self.epoch, self.learning_rate, self.checkpoint_path = Conf.Return(
     )
     self.repeat, self.transfer_layer_input_w = Conf.Return_RNN()
     if isTest:
         self.epoch = 1
     np.random.seed(1337)
     tf.set_random_seed(1337)
    def __init__(self, isTest=5):
        model.__init__(self, 'FC', 'Fully connective network')
        Conf = Configuration_FC()
        Conf.Return()
        self.input_layer_input_w, self.input_layer_output_w, self.input_layer_output_b, self.core_layer_input_w, self.core_layer_output_w, self.core_layer_output_b, self.action_layer_input_w, self.action_layer_output_w, self.action_layer_output_b, self.batch_size, self.epoch, self.learning_rate, self.checkpoint_path = Conf.Return(
        )

        if isTest:
            self.epoch = 1
        np.random.seed(1337)
        tf.set_random_seed(1337)
 def __init__(self, isTest=5):
     model.__init__(self, 'RandGlimpse', 'RandGlimpse network')
     Conf = Configuration_RandGlimpse()
     Conf.Return()
     self.input_layer_input_w, self.input_layer_output_w, self.input_layer_output_b, self.core_layer_input_w, self.core_layer_output_w, self.core_layer_output_b, self.action_layer_input_w, self.action_layer_output_w, self.action_layer_output_b, self.batch_size, self.epoch, self.learning_rate, self.checkpoint_path = Conf.Return(
     )
     self.repeat, self.transfer_layer_input_w = Conf.Return_RNN()
     self.sensorX, self.sensorY, self.depth, self.hg_size, self.hl_size, self.glimpse_size = Conf.Return_RandGlimpse(
     )
     if isTest:
         self.epoch = 1
     np.random.seed(1337)
     tf.set_random_seed(1337)
예제 #6
0
    def __init__(self, isTest=5):
        model.__init__(self, 'CNN', 'CNN network')
        Conf = Configuration_CNN()
        Conf.Return()
        Conf.Return_CNN()
        self.input_layer_input_w, self.input_layer_output_w, self.input_layer_output_b, self.core_layer_input_w, self.core_layer_output_w, self.core_layer_output_b, self.action_layer_input_w, self.action_layer_output_w, self.action_layer_output_b, self.batch_size, self.epoch, self.learning_rate, self.checkpoint_path = Conf.Return(
        )

        self.filter_depth, self.filter_height, self.filter_width, self.in_channels, self.out_channels, self.stride = Conf.Return_CNN(
        )
        if isTest:
            self.epoch = 1
        np.random.seed(1337)
        tf.set_random_seed(1337)
예제 #7
0
    def __init__(self, vpath):
        model.__init__(self, vpath)

        ##########################################################################################################
        ##
        ## General settings
        ##
        ##    General characteristics of the model
        ##
        ##########################################################################################################
        self.buildable = True  # True if the model needs to be trained (built) before it can be used
        # for prediction

        self.quantitative = True  # True for quantitative dependent variables, False for qualitative
        # (e.g. 1 or 0) endpoints

        self.confidential = True  # True for building the model without storing any information about
        # the training series stuctures

        self.identity = False  # If set to True, the structure of any query compound is compared with
        # those in the training series, returning the annotated value

        self.experimental = False  # If set to True, and the input file contains a field with the label
        # described by 'SDFileExperimental' this value is returned

        ##########################################################################################################
        ##
        ## Input setting
        ##
        ##    Labels of SDFile fields recognized by the program in the input file. These have the following
        ##    synthax. For example, if the SDFilenName field is 'name', the input file will contain this string
        ##    as shown:
        ##
        ##      > <name>
        ##      aspirin
        ##
        ##########################################################################################################
        self.SDFileName = 'name'  # Label of the SDFile field in the input file containing the name of
        # the molecule

        self.SDFileActivity = 'activity'  # Label of the SDFile field in the input file containing the value of
        # the dependent variable. Leave blank for non-supervided models (PCA)

        self.SDFileExperimental = ''  # Label of the SDFile field in the input file containing the value to
        # be returned, typically an experimentally determined value

        self.SDFileMetadata = [
        ]  # Label of any SDFile field that must be passed throug the normalization
        # filters. Typically they contain information that is used by models

        ##########################################################################################################
        ##
        ## Normalization settings
        ##
        ##    Define how the input structures are normalized and transformed prior to being processed
        ##
        ##    These settings apply both to the stucture of the training series and query structures, which are
        ##    submitted to exactly the same normalization workflow
        ##
        ##    * Licenses: the program moka is third party software and requires a license activation
        ##
        ##########################################################################################################
        self.norm = False  # If True the structures are normalized. When set to False the values
        # of all the following settings are ignored

        self.normStand = True  # If True the structures are submitted to the structural normalization
        # program 'standardizer'

        self.normNeutr = True  # If True the ionization of the structures is adjusted to the pH
        # detailed by 'normNeutr_pH' using the program defined below

        self.normNeutrMethod = 'moka'  # Name of the program used to adjust the ionization of the structures
        # when the value of 'normNeutr' is set to True

        self.normNeutr_pH = 7.4  # Value of the pH used to adjust the ionization of the structures
        # when the value of 'normNeutr' is set to True

        self.norm3D = True  # If True the structures are converted to 3D using the program 'corina'
        # irrespectivelly if the input structure is 2D or already 3D

        ##########################################################################################################
        ##
        ## Molecular descriptor settings
        ##
        ##    Define the program used to calculate the molecular descriptors (MD) from the input structures and
        ##    diverse parameters defining how these programs will carry out the molecular descriptor computation
        ##
        ##    * Licenses: the program pentacle is third party software and requires a license activation
        ##
        ##########################################################################################################
        self.MD = 'padel'  # Program used to calculate the MD. Must be one of the following
        # values: 'padel', 'pentacle', 'adriana' or 'external'

        # padel relevant settings

        self.padelMD = [
            '-2d'
        ]  # Type of padel MD to be calculared. Must be a subset of the values
        # in the following list, separated by a comma: '-2d' or '-3d'

        self.padelMaxRuntime = None  # Timeout (in miliseconds) used by padel to compute a single molecule.
        # If the computation time excedes this value the MD are undefined

        self.padelDescriptor = None  # Name of a xml formated filed containing the names of the MD computed
        # by padel

        # pentacle relevant settings

        self.pentacleProbes = [
            'DRY', 'O', 'N1', 'TIP'
        ]  # Name of the molecular probes used by program pentacle to compute
        # the molecular descriptors. Must be a subset of the values in the
        # following list, separated by a comma: 'DRY', 'O', 'N1' or 'TIP'

        self.pentacleOthers = [
        ]  # Include here any command line parameter to be sent to the program
        # pentacle

        # external relevant settings

        self.MDexternalFile = None  # Name of a TSV with MD. The first colum must be a text string
        # used to associate the MD with a given compound

        self.MDexternalID = None  # Name of the SDField containing the text string used in the TSV
        # file for associating the MD with the compound

        self.MDexternalField = None  # Name of a SDFiled with a tab separated line containing MD for this
        # molecule. This could be used to pass MD for predicted compound, not
        # present in the TSV file used at model building time

        ##########################################################################################################
        ##
        ## Modeling settings
        ##
        ##    Define the modeling method used to build the predictive model and diverse parameters defining
        ##    how this method will work
        ##
        ##########################################################################################################
        self.model = 'pls'  # Name of the modeling method used to build the model. Must be one of
        # the following values: 'pls', 'pca', 'RF'

        # pls relevant settings

        self.modelLV = 2  # Number of latent variables used to build the model

        self.modelAutoscaling = True  # If true, the molecular descriptors will be normalized to unit
        # variance by dividing each value by the variable variance

        self.modelCutoff = 'auto'  # Applicable only for PLS-DA ('quantitative' is False). The cutoff
        # value used to asign 1 or 0 to the predictions.
        # When set to 'auto' the value is assigned automatically as the one
        # producing the best compromise between sensitivity and specificity

        self.selVar = False  # If True the program will run the variable selection method
        # defined by 'selVarMethod'. This setting can extend significantly
        # the time required to build a model (by many hours or even days)

        # variable selection relevant settings

        self.selVarMethod = 'golpe'  # Name of the variable selection method. Must be one the following
        # values: 'golpe' (to be extended in following versions)

        self.selVarLV = 2  # Number of latent variables used to build the reduced models used
        # by the variable selection. Could be diferent than 'modelLV'

        self.selVarCV = 'LOO'  # Name of the cross-validation method used during the variable
        # selection. Must be one the following values: 'LOO' (to be extended
        # in following versions)

        self.selVarRun = 2  # Number of maximumm sequential runs of the GOLPE algorithm to run
        # before stop. The algorithm will run only until the model predictive
        # ability (assesed by cross-validation) will no longer improve

        self.selVarMask = None  # Name of a file containing a previously computed mask of selected and
        # non selected variables

        # RF relevant settings

        self.RFestimators = 100  # number of trees in the forest

        self.RFfeatures = 'sqrt'  # The number of features to consider when looking for the best split.
        # Acceptable values are 'sqrt', 'log2' and 'none'
        # - 'sqrt': then max_features=sqrt(n_features)
        # - 'log2': then max_features=log2(n_features)
        # - 'none': then max_features=n_features

        self.RFclass_weight = 'balanced'  # None or Balanced
        # class_weight : dict, list of dicts, “balanced”
        # The “balanced” mode uses the values of y to automatically adjust weights inversely
        # proportional to class frequencies in the input data as n_samples / (n_classes * np.bincount(y))

        self.RFtune = True  # If true optimizes the values of RFestimators and RFfeatures and generates a
        # diagnostic plot

        self.RFrandom = False  # If True a random seed is used for boostrapping
        # RF: Qualitative and Quantitative
        # Only valid for Qualitative Models #  If True a random seed is used as aleatory, if False, random seed is fixed to 1226.
        # int seed, RandomState instance, or None (default)
        # The seed of the pseudo random number generator to use when shuffling the d.ta for
        # probability estimation.

        ## Model Validation Settings

        self.ModelValidationCV = 'loo'  ##      ('kfold', 'gkfold', 'stkfold', 'logo', 'lpgo', 'loo', 'lpo', 'shufsplit', 'gshufplit', 'stshufsplit', 'psplit', 'tsplit')
        self.ModelValidationN = 2  ##       int, Only for n_splits or n_groups
        self.ModelValidationP = 1  ##       int, Only for n_samples e.g. LeavePOut(p)

        ##        kfold = KFold(n_splits=2, random_state=self.random_state, shuffle=False)              ### K-Folds cross-validator
        ##        gkfold = GroupKFold(n_splits=2)                                                       ### K-fold iterator variant with non-overlapping groups.
        ##        stkfold = StratifiedKFold(n_splits=2, random_state=self.random_state, shuffle=False)  ### Stratified K-Folds cross-validator
        ##        logo = LeaveOneGroupOut()                                                             ### Leave One Group Out cross-validator
        ##        lpgo = LeavePGroupsOut(n_groups=2)                                                    ### Leave P Group(s) Out cross-validator
        ##        loo = LeaveOneOut()                                                                   ### Leave-One-Out cross-validator
        ##        lpo = LeavePOut(2)                                                                    ### Leave-P-Out cross-validator
        ##        shufsplit = ShuffleSplit(n_splits=3, random_state=0, test_size=0.25, train_size=None) ### Random permutation cross-validator
        ##        gshufplit = GroupShuffleSplit(test_size=10, n_splits=100)                             ### Shuffle-Group(s)-Out cross-validation iterator
        ##        stshufsplit = StratifiedShuffleSplit(n_splits=3, test_size=0.5, random_state=0)       ### Stratified ShuffleSplit cross-validator
        ##        psplit = PredefinedSplit(test_fold=array([ 0,  1, -1,  1]))                           ### Predefined split cross-validator
        ##        tssplit = TimeSeriesSplit(n_splits=3)                                                 ### Time Series cross-validator

        self.ModelValidationLC = False  ##        Plot learning curve

        ##########################################################################################################
        ##
        ## View settings
        ##
        ##    Define the type of visual representation to be generated as well as some properties of the generated
        ##    graphics, like the color, shape and dimension of the markers
        ##
        ##    Available graphic types:
        ##
        ##    'pca'       builds a PCA model with the training series of the selected model and the same MDs. Then
        ##                represents the scores of the PC1 and PC2 in the horizontal and vertical axes
        ##
        ##    'property'  represents the training series of the selected model using the log P for the X axis and
        ##                the molecular weight (MW) for the Y axis, as computed using RDKit
        ##
        ##    'project'   projects the training series of the selected model on a reference dataset for which
        ##                it must have been generated a PCA model. The markers of each compound can be assigned
        ##                colors according to the distance to model (DModX)
        ##
        ##
        ##########################################################################################################
        self.viewType = 'property'  # Type of graphic to generate. Must be one of the following
        # values: 'pca', 'property' or 'project'

        self.viewBackground = False  # If True, the selected model is represented together with another
        # dataset, shown as reference, defined by the two following settings

        self.viewReferenceEndpoint = None  # Name of the reference dataset (the endpoint label)

        self.viewReferenceVersion = 0  # Version of the reference dataset

        self.plotPCAColor = 'red'  # Color of the marker
        self.plotPCAMarkerShape = 'D'  # Shape of the marker (see 'matplotlib.markers' document for a list)
        self.plotPCAMarkerSize = 40  # Size of the marker
        self.plotPCAMarkerLine = 0  # Thickness of the marker border

        self.plotPRPColor = 'red'  # Color of the marker
        self.plotPRPMarkerShape = 'D'  # Shape of the marker (see 'matplotlib.markers' document for a list)
        self.plotPRPMarkerSize = 40  # Size of the marker
        self.plotPRPMarkerLine = 0  # Thickness of the marker border

        self.plotPRJColor = 'DModX'  # Color of the marker or 'DModX' to assign a color according to
        # the distance to model (DModX) of every compound

        self.plotPRJMarkerShape = 'o'  # Shape of the marker (see 'matplotlib.markers' document for a list)
        self.plotPRJMarkerSize = 50  # Size of the marker
        self.plotPRJMarkerLine = 1  # Thickness of the marker border

        self.plotBGColor = '#aaaaaa'  # Color of the marker
        self.plotBGMarkerShape = 'o'  # Shape of the marker (see 'matplotlib.markers' document for a list)
        self.plotBGMarkerSize = 20  # Size of the marker
        self.plotBGMarkerLine = 0  # Thickness of the marker border

        ##########################################################################################################
        ##
        ## Path to external programs
        ##
        ##    Define the absolute path to diverse external programs used by the program. If the software is
        ##    updated, the correct path must be introduced here
        ##
        ##########################################################################################################
        self.mokaPath = '/opt/blabber/blabber4eTOX/'
        self.padelPath = '/opt/padel/padel218ws/'
        self.padelURL = 'http://localhost:9000/computedescriptors?params='
        self.pentaclePath = '/opt/pentacle/pentacle106eTOX/'
        self.adrianaPath = '/opt/AdrianaCode/AdrianaCode226/'
        self.corinaPath = '/opt/corina/corina3494/'
        self.javaPath = '/usr/java/jdk1.7.0_51/'
        self.RPath = '/opt/R/R-3.0.2/'
        self.standardiserPath = '/opt/standardiser/standardise20140206/'
예제 #8
0
 def __init__(self):
     model.__init__(self)
예제 #9
0
    def __init__ (self, vpath):
        model.__init__(self, vpath)

        ##########################################################################################################
        ##
        ## General settings
        ##
        ##    General characteristics of the model
        ##
        ##########################################################################################################
        self.buildable    = True           # True if the model needs to be trained (built) before it can be used
                                           # for prediction
                                           
        self.quantitative = True           # True for quantitative dependent variables, False for qualitative
                                           # (e.g. 1 or 0) endpoints 
        
        self.confidential = False          # True for building the model without storing any information about
                                           # the training series stuctures
                                           
        self.identity     = False          # If set to True, the structure of any query compound is compared with 
                                           # those in the training series, returning the annotated value
                                           
        self.experimental = False          # If set to True, and the input file contains a field with the label
                                           # described by 'SDFileExperimental' this value is returned
                                           
        ##########################################################################################################
        ##
        ## Input setting
        ##                                       
        ##    Labels of SDFile fields recognized by the program in the input file. These have the following
        ##    synthax. For example, if the SDFilenName field is 'name', the input file will contain this string
        ##    as shown:
        ##
        ##      > <name>
        ##      aspirin
        ##
        ##########################################################################################################
        self.SDFileName = 'name'           # Label of the SDFile field in the input file containing the name of
                                           # the molecule
                                           
        self.SDFileActivity = 'activity'   # Label of the SDFile field in the input file containing the value of
                                           # the dependent variable. Leave blank for non-supervided models (PCA)
                                           
        self.SDFileExperimental = ''       # Label of the SDFile field in the input file containing the value to
                                           # be returned, typically an experimentally determined value

        self.SDFileMetadata = []           # Label of any SDFile field that must be passed throug the normalization
                                           # filters. Typically they contain information that is used by models

        ##########################################################################################################        
        ##
        ## Normalization settings
        ##
        ##    Define how the input structures are normalized and transformed prior to being processed
        ##
        ##    These settings apply both to the stucture of the training series and query structures, which are 
        ##    submitted to exactly the same normalization workflow
        ##
        ##    * Licenses: the program moka is third party software and requires a license activation
        ##
        ##########################################################################################################
        self.norm = False                  # If True the structures are normalized. When set to False the values
                                           # of all the following settings are ignored
                                           
        self.normStand = True              # If True the structures are submitted to the structural normalization
                                           # program 'standardizer'
                                           
        self.normNeutr = True              # If True the ionization of the structures is adjusted to the pH
                                           # detailed by 'normNeutr_pH' using the program defined below
                                           
        self.normNeutrMethod = 'moka'      # Name of the program used to adjust the ionization of the structures
                                           # when the value of 'normNeutr' is set to True
                                           
        self.normNeutr_pH = 7.4            # Value of the pH used to adjust the ionization of the structures
                                           # when the value of 'normNeutr' is set to True
                                           
        self.norm3D = True                 # If True the structures are converted to 3D using the program 'corina'
                                           # irrespectivelly if the input structure is 2D or already 3D

        ##########################################################################################################
        ##
        ## Molecular descriptor settings
        ##
        ##    Define the program used to calculate the molecular descriptors (MD) from the input structures and
        ##    diverse parameters defining how these programs will carry out the molecular descriptor computation
        ##
        ##    * Licenses: the program pentacle is third party software and requires a license activation
        ##
        ##########################################################################################################
        self.MD = 'padel'                   # Program used to calculate the MD. Must be one of the following
                                            # values: 'padel', 'pentacle', 'adriana' or 'external'

        # padel relevant settings
        
        self.padelMD = ['-2d']              # Type of padel MD to be calculared. Must be a subset of the values
                                            # in the following list, separated by a comma: '-2d' or '-3d'
                                            
        self.padelMaxRuntime = None         # Timeout (in miliseconds) used by padel to compute a single molecule.
                                            # If the computation time excedes this value the MD are undefined
                                            
        self.padelDescriptor = None         # Name of a xml formated filed containing the names of the MD computed
                                            # by padel

        # pentacle relevant settings
        
        self.pentacleProbes = ['DRY','O',
                               'N1','TIP']  # Name of the molecular probes used by program pentacle to compute
                                            # the molecular descriptors. Must be a subset of the values in the
                                            # following list, separated by a comma: 'DRY', 'O', 'N1' or 'TIP'
                                            
        self.pentacleOthers = []            # Include here any command line parameter to be sent to the program
                                            # pentacle 


        # external relevant settings

        self.MDexternalFile  = None         # Name of a TSV with MD. The first colum must be a text string
                                            # used to associate the MD with a given compound
                                            
        self.MDexternalID    = None         # Name of the SDField containing the text string used in the TSV
                                            # file for associating the MD with the compound 
        
        self.MDexternalField = None         # Name of a SDFiled with a tab separated line containing MD for this
                                            # molecule. This could be used to pass MD for predicted compound, not
                                            # present in the TSV file used at model building time
        
        ##########################################################################################################
        ##
        ## Modeling settings
        ##
        ##    Define the modeling method used to build the predictive model and diverse parameters defining 
        ##    how this method will work
        ##
        ##########################################################################################################
        self.model = 'pls'                  # Name of the modeling method used to build the model. Must be one of
                                            # the following values: 'pls', 'pca', 'RF'

        # pls relevant settings
        
        self.modelLV = 2                    # Number of latent variables used to build the model
        
        self.modelAutoscaling = True        # If true, the molecular descriptors will be normalized to unit 
                                            # variance by dividing each value by the variable variance

        self.modelCutoff = 'auto'           # Applicable only for PLS-DA ('quantitative' is False). The cutoff
                                            # value used to asign 1 or 0 to the predictions.
                                            # When set to 'auto' the value is assigned automatically as the one
                                            # producing the best compromise between sensitivity and specificity
                                            
        self.selVar = False                 # If True the program will run the variable selection method
                                            # defined by 'selVarMethod'. This setting can extend significantly 
                                            # the time required to build a model (by many hours or even days)

        # variable selection relevant settings
        
        self.selVarMethod = 'golpe'         # Name of the variable selection method. Must be one the following
                                            # values: 'golpe' (to be extended in following versions)
                                            
        self.selVarLV = 2                   # Number of latent variables used to build the reduced models used
                                            # by the variable selection. Could be diferent than 'modelLV'
        
        self.selVarCV = 'LOO'               # Name of the cross-validation method used during the variable
                                            # selection. Must be one the following values: 'LOO' (to be extended
                                            # in following versions)
        
        self.selVarRun = 2                  # Number of maximumm sequential runs of the GOLPE algorithm to run
                                            # before stop. The algorithm will run only until the model predictive
                                            # ability (assesed by cross-validation) will no longer improve
        
        self.selVarMask = None              # Name of a file containing a previously computed mask of selected and
                                            # non selected variables

        # RF relevant settings

        self.RFestimators = 100              # number of trees in the forest
        
        self.RFfeatures = 'sqrt'             # The number of features to consider when looking for the best split.
                                             # Acceptable values are 'sqrt', 'log2' and 'none'
                                             # - 'sqrt': then max_features=sqrt(n_features)
                                             # - 'log2': then max_features=log2(n_features)
                                             # - 'none': then max_features=n_features

        self.RFclass_weight = 'balanced'     # None or Balanced
                                             # class_weight : dict, list of dicts, “balanced”
                                             # The “balanced” mode uses the values of y to automatically adjust weights inversely
                                             # proportional to class frequencies in the input data as n_samples / (n_classes * np.bincount(y))

        self.RFtune = True                   # If true optimizes the values of RFestimators and RFfeatures and generates a
                                             # diagnostic plot

        self.RFrandom = False                # If True a random seed is used for boostrapping
                                             # RF: Qualitative and Quantitative
                                             # Only valid for Qualitative Models #  If True a random seed is used as aleatory, if False, random seed is fixed to 1226.       
                                             # int seed, RandomState instance, or None (default)
                                             # The seed of the pseudo random number generator to use when shuffling the d.ta for
                                             # probability estimation.

          
        ## Model Validation Settings
        
        self.ModelValidationCV = 'loo'      ##      ('kfold', 'gkfold', 'stkfold', 'logo', 'lpgo', 'loo', 'lpo', 'shufsplit', 'gshufplit', 'stshufsplit', 'psplit', 'tsplit')
        self.ModelValidationN = 2           ##       int, Only for n_splits or n_groups
        self.ModelValidationP = 1           ##       int, Only for n_samples e.g. LeavePOut(p)

                                            ##        kfold = KFold(n_splits=2, random_state=self.random_state, shuffle=False)              ### K-Folds cross-validator
                                            ##        gkfold = GroupKFold(n_splits=2)                                                       ### K-fold iterator variant with non-overlapping groups.
                                            ##        stkfold = StratifiedKFold(n_splits=2, random_state=self.random_state, shuffle=False)  ### Stratified K-Folds cross-validator
                                            ##        logo = LeaveOneGroupOut()                                                             ### Leave One Group Out cross-validator
                                            ##        lpgo = LeavePGroupsOut(n_groups=2)                                                    ### Leave P Group(s) Out cross-validator
                                            ##        loo = LeaveOneOut()                                                                   ### Leave-One-Out cross-validator
                                            ##        lpo = LeavePOut(2)                                                                    ### Leave-P-Out cross-validator
                                            ##        shufsplit = ShuffleSplit(n_splits=3, random_state=0, test_size=0.25, train_size=None) ### Random permutation cross-validator
                                            ##        gshufplit = GroupShuffleSplit(test_size=10, n_splits=100)                             ### Shuffle-Group(s)-Out cross-validation iterator
                                            ##        stshufsplit = StratifiedShuffleSplit(n_splits=3, test_size=0.5, random_state=0)       ### Stratified ShuffleSplit cross-validator
                                            ##        psplit = PredefinedSplit(test_fold=array([ 0,  1, -1,  1]))                           ### Predefined split cross-validator
                                            ##        tssplit = TimeSeriesSplit(n_splits=3)                                                 ### Time Series cross-validator

        self.ModelValidationLC = False       ##        Plot learning curve
        


        ##########################################################################################################
        ##
        ## View settings
        ##
        ##    Define the type of visual representation to be generated as well as some properties of the generated 
        ##    graphics, like the color, shape and dimension of the markers
        ##
        ##    Available graphic types:
        ##                                    
        ##    'pca'       builds a PCA model with the training series of the selected model and the same MDs. Then
        ##                represents the scores of the PC1 and PC2 in the horizontal and vertical axes
        ##
        ##    'property'  represents the training series of the selected model using the log P for the X axis and
        ##                the molecular weight (MW) for the Y axis, as computed using RDKit
        ##
        ##    'project'   projects the training series of the selected model on a reference dataset for which
        ##                it must have been generated a PCA model. The markers of each compound can be assigned
        ##                colors according to the distance to model (DModX)
        ##                
        ##
        ##########################################################################################################                                            
        self.viewType = 'property'          # Type of graphic to generate. Must be one of the following
                                            # values: 'pca', 'property' or 'project'
                                            
        self.viewBackground = False         # If True, the selected model is represented together with another
                                            # dataset, shown as reference, defined by the two following settings
                                            
        self.viewReferenceEndpoint = None   # Name of the reference dataset (the endpoint label)
        
        self.viewReferenceVersion = 0       # Version of the reference dataset

        self.plotPCAColor = 'red'           # Color of the marker 
        self.plotPCAMarkerShape = 'D'       # Shape of the marker (see 'matplotlib.markers' document for a list)
        self.plotPCAMarkerSize = 40         # Size of the marker
        self.plotPCAMarkerLine = 0          # Thickness of the marker border
        
        self.plotPRPColor = 'red'           # Color of the marker
        self.plotPRPMarkerShape = 'D'       # Shape of the marker (see 'matplotlib.markers' document for a list)
        self.plotPRPMarkerSize = 40         # Size of the marker
        self.plotPRPMarkerLine = 0          # Thickness of the marker border
        
        self.plotPRJColor = 'DModX'         # Color of the marker or 'DModX' to assign a color according to
                                            # the distance to model (DModX) of every compound
                                            
        self.plotPRJMarkerShape = 'o'       # Shape of the marker (see 'matplotlib.markers' document for a list)
        self.plotPRJMarkerSize = 50         # Size of the marker
        self.plotPRJMarkerLine = 1          # Thickness of the marker border
        
        self.plotBGColor = '#aaaaaa'        # Color of the marker
        self.plotBGMarkerShape = 'o'        # Shape of the marker (see 'matplotlib.markers' document for a list)
        self.plotBGMarkerSize = 20          # Size of the marker
        self.plotBGMarkerLine = 0           # Thickness of the marker border

        ##########################################################################################################
        ##
        ## Path to external programs
        ##
        ##    Define the absolute path to diverse external programs used by the program. If the software is
        ##    updated, the correct path must be introduced here
        ##
        ##########################################################################################################                                            
        self.mokaPath = '/opt/blabber/blabber4eTOX/'
        self.padelPath = '/opt/padel/padel218ws/'
        self.padelURL = 'http://localhost:9000/computedescriptors?params=' 
        self.pentaclePath = '/opt/pentacle/pentacle106eTOX/'
        self.adrianaPath = '/opt/AdrianaCode/AdrianaCode226/'
        self.corinaPath = '/opt/corina/corina3494/'
        self.javaPath = '/usr/java/jdk1.7.0_51/'
        self.RPath = '/opt/R/R-3.0.2/'
        self.standardiserPath = '/opt/standardiser/standardise20140206/'
예제 #10
0
    def __init__(self, parent, stype='dm15'):

        if stype != 'dm15':
            raise ValueError, "This model only supports the dm15 parameter"
        model.__init__(self, parent)
        self.rbs = [
            'u', 'B', 'V', 'g', 'r', 'i', 'Y', 'J', 'H', 'K', 'Bs', 'Vs', 'Rs',
            'Is', 'J_K', 'H_K'
        ]
        self.parameters = {
            'DM': None,
            'dm15': None,
            'EBVhost': None,
            'Tmax': None
        }
        self.errors = {'DM': 0, 'dm15': 0, 'EBVhost': 0, 'Tmax': 0}
        self.template = NIR_ubertemp.template()
        # R_V as a function of which calibration fit number (see Folatelli et
        #	 al. (2009) table 9
        self.Rv_host = {1: 0, 2: 3.10, 3: 1.50, 4: 1.46, 5: 1.46, 6: 1.01}
        self.dRv_host = {1: 0, 2: 0, 3: 0.11, 4: 0.10, 5: 0.33, 6: 0.31}
        self.M0 = {
            1: -19.07,
            2: -19.39,
            3: -19.15,
            4: -19.13,
            5: -19.16,
            6: -19.11
        }
        self.dM0 = {1: 0.01, 2: 0.02, 3: 0.02, 4: 0.01, 5: 0.03, 6: 0.02}
        self.b = {1: 1.03, 2: 0.98, 3: 0.94, 4: 1.05, 5: 0.94, 6: 1.08}
        self.db = {1: 0.25, 2: 0.41, 3: 0.11, 4: 0.11, 5: 0.12, 6: 0.11}
        # B-X pseudo-colors from Folatelli et al. (2009) table 3
        self.colors = {
            'u': -0.32,
            'B': 0.0,
            'V': -0.02,
            'g': 0.05,
            'r': -0.09,
            'i': -0.63,
            'Y': -0.69,
            'J': -0.65,
            'H': -0.79,
            'K': -0.61,
            'J_K': -0.65,
            'H_K': -0.79
        }
        self.dcolors = {
            'u': 0.04,
            'B': 0,
            'V': 0.01,
            'g': 0.02,
            'r': 0.02,
            'i': 0.02,
            'Y': 0.03,
            'J': 0.02,
            'H': 0.03,
            'K': 0.05,
            'J_K': 0.02,
            'H_K': 0.03,
            'Bs': 0,
            'Vs': 0,
            'Rs': 0,
            'Is': 0
        }
        self.color_slopes = {
            'u': -0.47,
            'B': 0.0,
            'V': 0.12,
            'g': 0.05,
            'r': 0.29,
            'i': 0.39,
            'Y': 0.63,
            'J': 0.67,
            'H': 0.66,
            'K': 0.26,
            'J_K': 0.67,
            'H_K': 0.66
        }
        self.dcolor_slopes = {
            'u': 0.25,
            'B': 0,
            'V': 0.05,
            'g': 0.06,
            'r': 0.07,
            'i': 0.08,
            'Y': 0.17,
            'J': 0.10,
            'H': 0.11,
            'K': 0.18,
            'J_K': 0.10,
            'H_K': 0.11,
            'Bs': 0,
            'Vs': 0,
            'Rs': 0,
            'Is': 0
        }
        self.do_Robs = 0
        self.Robs = {}