Python seq_to_yield_modelの例、submodels_module.seq_to_yield_model Pythonの例

コード例 #1

0

ファイルを表示

    def seq_to_yield_simple(self):
        self.compare_test = False
        self.get_control = self.get_assay_control

        b_models = ['ridge', 'forest', 'svm', 'fnn']
        model_list = []
        for arch in b_models:
            model_list.append(modelbank.seq_to_yield_model(arch, 1))

        self.plot_bar(model_list, 'seq_to_yield_simple')

コード例 #2

0

ファイルを表示

    def get_best_seq_to_yield_simple(self):
        self.compare_test = False
        self.get_control = self.get_assay_control

        b_models = ['ridge', 'forest', 'svm', 'fnn']
        model_list = []
        for arch in b_models:
            model_list.append(modelbank.seq_to_yield_model(arch, 1))

        best_model = self.get_best_model(model_list)

        return best_model

コード例 #3

0

ファイルを表示

ファイル: model_comparisons.py プロジェクト: sidlax2503/DevRep

    def get_best_seq_to_yield_simple(self):
        self.compare_test=False
        ## First sets the comapre_test class boolean to false, then creates a new class variable get_control to 
        ## get_control and links it to the function get_assay_control
        self.get_control=self.get_assay_control
        ## b_models is a list containing different simple regression models used to build the seq to yield correlation
        b_models=['ridge','forest','svm','fnn']
        model_list=[]
        for arch in b_models:
            ## for each different type of the regression model a seq_to_yield object specified in submodel_module.py program,
            ## is built with a sample fraction of 1 and this in turn is added to the temporary model_list list. 
            model_list.append(modelbank.seq_to_yield_model(arch,1))

        best_model=self.get_best_model(model_list)
        ## The model_list compiled is run through the get_best_model() function and the output from it is returned in this function.
        return best_model

コード例 #4

0

ファイルを表示

def main():
    '''
    compare test performances when reducing training sample size. This version is for first paper, predicting yield from assays and one-hot encoded sequence. 
    '''

    a = int(sys.argv[1])
    if a < 4:
        b = 0
    elif a < 8:
        a = a - 4
        b = 1
    elif a < 12:
        a = a - 8
        b = 2
    elif a == 12:
        b = 3
        a = a - 12
    else:
        print('incorrect toggle number')

    arch_list = ['ridge', 'svm', 'forest', 'fnn']

    # size_list=[0.055,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1]
    size_list = [0.7, 0.8, 0.9, 1]

    for size in size_list:
        if b == 0:
            mdl = modelbank.seqandassay_to_yield_model([1, 8, 10],
                                                       arch_list[a], size)
        elif b == 1:  #1,5,9,12
            mdl = modelbank.assay_to_yield_model([1, 8, 10], arch_list[a],
                                                 size)
        elif b == 2:
            mdl = modelbank.seq_to_yield_model(arch_list[a], size)
        elif b == 3:
            mdl = modelbank.control_to_yield_model(arch_list[a], size)

        for seed in range(9):  #no seed is seed=42
            mdl.change_sample_seed(seed)
            mdl.cross_validate_model()
            mdl.limit_test_set([1, 8, 10])
            mdl.test_model()

コード例 #5

0

ファイルを表示

ファイル: main_weightedtraining.py プロジェクト: brycejoh16/DevRep

            mdl = modelbank.assay_to_yield_model([1, 8, 10], arch, 1)
        elif i == 1:
            mdl = modelbank.weighted_assay_to_yield_model([1, 8, 10], arch, 1)
        elif i == 2:
            mdl = modelbank.seqandassay_to_yield_model([1, 8, 10], arch, 1)
        else:
            mdl = modelbank.seqandweightedassay_to_yield_model([1, 8, 10],
                                                               arch, 1)
        if mdl.model_stats['cv_avg_loss'] < cv_loss:
            cv_loss = mdl.model_stats['cv_avg_loss']
            test_loss = mdl.model_stats['test_avg_loss']
            test_std = mdl.model_stats['test_std_loss']
    loss_per_model.append(test_loss)
    std_per_model.append(test_std)

seq_model = modelbank.seq_to_yield_model('forest', 1)
seq_loss = seq_model.model_stats['test_avg_loss']
seq_std = seq_model.model_stats['test_std_loss']
x = [-0.3, 0.8]
seq_plus = [seq_loss + seq_std] * 2
seq_min = [seq_loss - seq_std] * 2

control_model = modelbank.control_to_yield_model('ridge', 1)
control_loss = control_model.model_stats['test_avg_loss']
control_model.limit_test_set([1, 8, 10])
exploded_df, _, _ = load_format_data.explode_yield(control_model.testing_df)
exp_var = np.average(np.square(np.array(exploded_df['y_std'])))

fig, ax = plt.subplots(1, 1, figsize=[2, 2], dpi=300)

xloc = [0, 0.5]

コード例 #6

0

ファイルを表示

def main():
    '''
    compare test performances when reducing training sample size. This version is for first paper, predicting yield from assays and one-hot encoded sequence. 
    '''
    ## A command line input is required when running this program. The integer input
    ## should be between 0-12.
    a=int(sys.argv[1])
    if a<4:
        b=0
        ## if the input is less than 4 then b value is set to 0
    elif a<8:
        a=a-4
        b=1
        ## if a is between 4-8 then the b value is set to 1 and a is reduced by 4
    elif a<12:
        a=a-8
        b=2
        ## if a is between 8-12 then the b value is set to 2 and a is reduced by 8
    elif a==12:
        b=3
        a=a-12
        ## if a is equal to 12 then the b value is set to 3 and a is set to 0. 
    else:
        print('incorrect toggle number')
        ## If the inout is out of bounds then an error message is printed. 
    arch_list=['ridge','svm','forest','fnn']
    ## A string list is created containing the names of the different regression models and stored as arch_list
    # size_list=[0.055,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1]
    size_list=[0.7,0.8,0.9,1]
    ## A float list is created containing varying amounts of sample fractions and stored as size_list
    for size in size_list:
        ## each element in the size_list array, we check the value of the b value created in the above if-else
        ## statements and this dictates the kind of submodel_module.py object created
        ## if b = 0, then a seqandassay_to_yield_model object is created with an assay list of [1,8,10]
        ## a regression model dictated by the 'a' index of the arch_list and the size determined by the iteration of size_list
        if b==0:
            mdl=modelbank.seqandassay_to_yield_model([1,8,10],arch_list[a],size)
        ## if b = 1, then a assay_to_yield_model object is created with an assay list of [1,8,10]
        ## a regression model dictated by the 'a' index of the arch_list and the size determined by the iteration of size_list
        elif b==1: #1,5,9,12
            mdl=modelbank.assay_to_yield_model([1,8,10],arch_list[a],size)
        ## if b = 2, then a seq_to_yield_model object is created with a regression model dictated by
        ## the 'a' index of the arch_list and the size determined by the iteration of size_list
        elif b==2: 
            mdl=modelbank.seq_to_yield_model(arch_list[a],size)
        ## if b = 3, then a control_to_yield_model object is created with a regression model dictated by
        ## the 'a' index of the arch_list and the size determined by the iteration of size_list
        elif b==3:
            mdl=modelbank.control_to_yield_model(arch_list[a],size)
            
        for seed in range(9): #no seed is seed=42
            ## For each element in the int range [0,9). The sample_seed class int to the element
            ## Then the trial data, model data and plots are updated to reflect the new sample_seed size
            mdl.change_sample_seed(seed)
            ## Then the best hyperparameters for the given model and seed size is determined using the cross_validate_model()
            ## function from the model object 
            mdl.cross_validate_model()
            ## Following this limit_test_set() function defined in the x_to_yield_model parent class to update the
            ## testing_df class dataframe to reflect the 1,8,10 assays.
            mdl.limit_test_set([1,8,10])
            ## Finally using the test_model() function from the model parent class  is run to
            ## train the model using the hyperparameters defined above and the training data to predict the testing dataset.
            mdl.test_model()

コード例 #7

0

ファイルを表示

 for arch in arch_list:
     ## Then for each element in the arch_list
     ## Depending on which outermost iterative loop(i.e what value of b) we are in the model object we are going to create
     ## from the submodels_module.py program 
     ## if b = 0, then a seqandassay_to_yield_model object is created with an assay list of [1,8,10]
     ## a regression model a sample_fraction determined by the iteration of arch_llist and size_list respectively. 
     if b==0:
         mdl=modelbank.seqandassay_to_yield_model([1,8,10],arch,size)
     ## if b = 1, then a assay_to_yield_model object is created with an assay list of [1,8,10]
     ## a regression model a sample_fraction determined by the iteration of arch_llist and size_list respectively.
     elif b==1: #1,5,9,12
         mdl=modelbank.assay_to_yield_model([1,8,10],arch,size)
     ## if b = 2, then a seq_to_yield_model object is created with a regression model a sample_fraction
     ##  determined by the iteration of arch_llist and size_list respectively.
     elif b==2: 
         mdl=modelbank.seq_to_yield_model(arch,size)
     ## if b = 2, then a control_to_yield_model object is created with a ridge regression model and a sample_fraction
     ##  determined by the iteration of size_list.
     elif b==3:
         mdl=modelbank.control_to_yield_model('ridge',size)
 
     cur_cv_loss=[]
      cur_test_loss=[]
     cur_cv_loss.append(mdl.model_stats['cv_avg_loss'])
     cur_test_loss.append(mdl.model_stats['test_avg_loss'])
     ## Once the model object is created and stored on mdl, two new lists cur_cv_loss and cur_test_loss are created and the
     ## cv_avg_loss and test_avg_loss columns in the model_stats class dataframe are accessed and stored respectively.
     for seed in range(9):
         ## For each element in the int range of [0,9), the change_sample_seed() function of the x_to_yield_model parent class
         ## defined in the submodels_module.py program is run which changes the sample_seed class int to reflect the element and updates the
         ## trial data, model data and plots to reflect this change.