Beispiel #1
0
def main():

    while True:
        word = input("Find a name in English dictionary: ")
        word = word.lower()
        if word == '/exit':
            break
        elif functions.translate(word):
            print(' \n'.join(functions.translate(word)))
        elif functions.similar_search(word):
            searched = functions.similar_search(word)
            print('Did you mean?(Y/N):', searched)
            answer = str(input())
            answer = answer.lower()
            if answer == 'y':
                print(' \n'.join(functions.translate(searched)))
            else:
                print("Unknown word... Try again or type '/exit' to quit.")
        else:
            print("Unknown word... Try again or type '/exit' to quit.")
Beispiel #2
0
def insert_codons(seq1, seq2, pad_left, pad_right, homology_length,
                  triplets_to_insert):
    seqlen = len(seq1)
    for codon_position in range(0, len(seq1), 3):
        # note that seq2 is the wild type
        replaced_codon = seq2[codon_position:codon_position + 3]
        replaced_AA = f.translate(replaced_codon)
        left_padding_length = max([homology_length - codon_position, 0])
        if left_padding_length > 0:
            left_padding_seq = pad_left[-left_padding_length:].lower()
        else:
            left_padding_seq = ''

        right_padding_length = max(
            [homology_length - (len(seq1) - codon_position), 0])
        if right_padding_length > 0:
            right_padding_seq = pad_right[:right_padding_length].lower()
        else:
            right_padding_seq = ''

        for i in triplets_to_insert:
            inserted_AA = f.translate(i)
            if inserted_AA in singleSNPcodons[replaced_codon]:
                singleSNPmutation = True
            else:
                singleSNPmutation = False
            left_seq = seq1[max(0, codon_position -
                                homology_length):codon_position]
            right_seq = seq2[codon_position +
                             3:min(seqlen, codon_position + homology_length +
                                   3)]
            yield (' '.join([
                replaced_codon, "(" + replaced_AA + ")", "to", inserted_AA,
                str(singleSNPmutation), left_padding_seq + left_seq, i,
                right_seq + right_padding_seq
            ]))
Beispiel #3
0
 def __init__(self, master=None):
     """Init method for the MainFrame class that contains every widget used"""
     Frame.__init__(self, master)
     #Defining Attributes :
     self.host_ip = str()
     self.host_socket = sk.socket(sk.AF_INET, sk.SOCK_STREAM)
     self.port = StringVar()       
     self.max_connections = StringVar()
     self.users = dict()
     self.language = "EN-us"
     self.window = master
     
     self.pack(fill=BOTH)
     self.init_tabs()
     self.init_menu_bar()
     self.load_options()
     self.translation = translate()  
 def playGame(self):
     board = func.generateBoard()
     totalPieces = nn.BOARDSIZE-4
     currPlayer = 1
     func.visualizeBoard(board)
     
     while totalPieces != 0:
         # Check that there are valid moves and switch if necessary
         moves = func.possibleMoves(currPlayer, board)
         if not moves:
             currPlayer = 0 - currPlayer
             moves = func.possibleMoves(currPlayer, board)
             if not moves:
                 break
             
         choice = -1
         if currPlayer == 1: # Human playing
             print("Your move! (You are X)")
             while choice not in moves:
                 x = int(input("Input x (Upper left is (0,0)): "))
                 y = int(input("Input y (Upper left is (0,0)): "))
                 choice = func.translate(x,y)
         else: # Follow the procedure to produce network output
             print("Opponent's move:")
             decisions = self.forwardpass(board, currPlayer)
             bestchoice = np.argmax(decisions)
             while bestchoice not in moves:
                 decisions[bestchoice] = 0
                 bestchoice = np.argmax(decisions)
             choice = bestchoice
         
         # Update board, current player, and number of pieces. Then print
         # the board
         board = func.placePiece(currPlayer,board,choice,moves[choice])
         func.visualizeBoard(board)
         currPlayer = 0 - currPlayer
         totalPieces -= 1
                 
     return sum(board)
     #func.visualizeBoard()
Beispiel #5
0
allAminoAcids = list(putativeCodonsTable['AA'].unique())

# retrieve rows with maximum frequency, grouped by amino acid
abundantCodons = putativeCodonsTable.loc[putativeCodonsTable.groupby('AA')
                                         ['count'].idxmax()]

# f.sampleCodon(putativeCodonsTable, "F")

fasta_1 = f.read_fasta(args.fasta_1_filename)
fasta_2 = f.read_fasta(args.fasta_2_filename)

# Assert sequences are identical length
assert (len(fasta_1) == len(fasta_2)), "Sequences not same length!"

# Assert amino acid sequences are identical
aa_1 = f.translate(fasta_1)
aa_2 = f.translate(fasta_2)
assert (aa_1 == aa_2), "Amino acid sequences not identical!"

# Load upstream and downstream seq

with open('upstream.dna', 'r') as infile:
    upstream = infile.read().strip()

with open('downstream.dna', 'r') as infile:
    downstream = infile.read().strip()

codons_1 = f.split_codons(fasta_1)
codons_2 = f.split_codons(fasta_2)

# create mutated codon dictionary
Beispiel #6
0
def encaps_RF():
    args = parser.parse_args()

    data_size = args.size-1


    if(args.step > 0 & args.step < 4):

        data = func.reading_data(args.diffuse,data_size)



    print("Finished with reading Data ... \n")

    if(args.sv):

        #calculate the mean scaled
        data = func.calc_scaled_width_and_length(data)
        data = data.drop(['scaled_width','scaled_length'],axis=1)



        print("Finished with calculating Mean Scaled Values ... \n")
        plt.plot(data["telescope_type_id"],data["mc_energy"],".")
        plt.xlabel("telescope_type")
        plt.ylabel("mc_energy")
        plt.savefig("plots/telescope_type.jpg")
        plt.close()


    if(args.step > 0 & args.step < 4):
        data = shuffle(data)
        #drop unimportant DATA
        data, droped_data = func.drop_data(data)

        #data['weight'] = droped_data['telescope_type_name']
        truth = data[['mc_energy','array_event_id','run_id']]
        data = data.drop('mc_energy',axis=1)

        #fit and predict
        RFr = RandomForestRegressor(max_depth=10, n_jobs=-1,n_estimators=100, oob_score=True, max_features='sqrt')
        train_i, test_i = train_test_split(data[['array_event_id','run_id']],test_size=0.66)

        X_train = data.loc[data[['array_event_id','run_id']].isin(train_i)[data[['array_event_id','run_id']].isin(train_i)==True].dropna().index]
        X_test = data.loc[data[['array_event_id','run_id']].isin(test_i)[data[['array_event_id','run_id']].isin(test_i)==True].dropna().index]
        y_train = truth.loc[truth[['array_event_id','run_id']].isin(train_i)[truth[['array_event_id','run_id']].isin(train_i)==True].dropna().index]
        y_test = truth.loc[truth[['array_event_id','run_id']].isin(test_i)[truth[['array_event_id','run_id']].isin(test_i)==True].dropna().index]

        if(args.sv):

            #calculate the mean scaled
            X_train = func.calc_scaled_width_and_length(X_train)
            X_test = func.calc_scaled_width_and_length(X_test)


        X1 = X_train.drop(['array_event_id','run_id'],axis=1).values
        X2 = X_test.drop(['array_event_id','run_id'],axis=1).values
        y1 = y_train.drop(['array_event_id','run_id'],axis=1).values
        y2 = y_test.drop(['array_event_id','run_id'],axis=1).values
        print("We use these attributes for the first RF: \n ",list(X_train.drop(['array_event_id','run_id'],axis=1)))
        RFr.fit(X1, y1)
                ############### overfitting ####################
        print("The oob_score is: ",RFr.oob_score_)

                ############# feature importance ################
        feature = RFr.feature_importances_
        indices = np.argsort(feature)[::-1]
        names = list(X_train.drop(['array_event_id','run_id'],axis=1))
        names = func.translate(names,1)
        # Print the feature ranking
        print("Feature ranking:")

        for f in range(X1.shape[1]):
            print("%d. feature %s (%f)" % (f + 1, names[indices[f]], feature[indices[f]]))


        data1=np.array([tree.feature_importances_ for tree in RFr.estimators_])
        data1=data1[:,indices]
        position_ticks = np.arange(0,X1.shape[1])+1
        plt.boxplot(data1,notch=False)
        plt.xticks(position_ticks,[names[i] for i in indices],rotation=90)
        plt.ylabel('Wichtigkeit')
        plt.tight_layout()
        plt.savefig("plots/feautureimportance_boxplot_firstForest.pdf")
        plt.close()

            ################# prediction###############
        predictions = RFr.predict(X2)
        print("Trainiert mit:",y_train.shape[0]," \t Getestet mit: ",y_test.shape[0])

        z=np.array([predictions,y2[:,0]])
        np.savetxt("data/encaps_pred_data.txt",z.T)
        pred = pd.DataFrame({'prediction':predictions, 'mc_energy':y_test['mc_energy'], 'array_event_id':y_test['array_event_id'], 'run_id':y_test['run_id']})

        print('RandomForestRegressor:\n\t Coefficient for determination: %.2f \n' % r2_score(predictions,y2[:,0]),
                '\texplained_variance score: %.2f \n' % explained_variance_score(predictions,y2[:,0]),
                '\tmean squared error: %.2f \n' % mean_squared_error(predictions,y2[:,0]),
                "Finished with the first prediction ... \n")

        '''
        ############## Treeinterpreter #################
        test_dat = X2[:2,:]
        prediction, bias , contrebutions = ti.predict(RFr, test_dat)

        for i in range(len(test_dat)):
            print("Instance: ", i)
            print("Bias: ", bias[i])
            print("Feuture contribution: ")
            for c, feauture in sorted(zip(contrebutions[i],names), key = lambda x: -abs(x[0])):
                print (feauture, round(c,2))
            print("-"*20)
        '''
    if(args.step > 1):

        ######### gewichteter und nicht gewichteter Mittelwert ######################
        X_test_w = X_test.set_index(['run_id','array_event_id'])
        pred_w = pred.set_index(['run_id','array_event_id'])
        data_w = pd.concat([X_test_w,pred_w],axis=1).reset_index()
        truth_grouped = y_test.drop_duplicates().set_index(['run_id','array_event_id'])


        ################ Intensity als Gewicht?#################
        rel_err = (data_w["prediction"]-data_w["mc_energy"])/data_w["mc_energy"]
        plt.plot(rel_err,data_w["intensity"],'b.')
        plt.xlabel("relativer Fehler")
        plt.ylabel("Intensität")
        #plt.xscale("log")
        plt.yscale("log")
        plt.tight_layout()
        plt.savefig("plots/intensity.jpg")
        plt.close()
        """
Beispiel #7
0
def test_translate():
    print("Testing translate function...")
    assert isinstance(translate(test_rna_strand), str)
    assert translate(test_rna_strand) == 'MAMAPRTEINSTRING'
    print("All tests passed!\n")
Beispiel #8
0
def RF_trafo():
    args = parser.parse_args()

    data_size = args.size - 1

    data = func.reading_data(args.diffuse, data_size)

    print("Finished with reading Data ... \n")

    if (args.sv):

        #calculate the mean scaled
        data = func.calc_scaled_width_and_length(data)
        data = data.drop(['scaled_width', 'scaled_length'], axis=1)

        print("Finished with calculating Mean Scaled Values ... \n")

    data = shuffle(data)
    #drop unimportant DATA
    data, droped_data = func.drop_data(data)
    #data['weight'] = droped_data['telescope_type_name']
    truth = data[['mc_energy', 'array_event_id', 'run_id']]
    data = data.drop('mc_energy', axis=1)
    #fit and predict
    RFr = RandomForestRegressor(max_depth=10,
                                n_jobs=-1,
                                n_estimators=100,
                                oob_score=True,
                                max_features='sqrt')
    train_i, test_i = train_test_split(data[['array_event_id', 'run_id']],
                                       test_size=0.66)
    X_train = data.loc[data[['array_event_id', 'run_id']].isin(train_i)[data[
        ['array_event_id', 'run_id']].isin(train_i) == True].dropna().index]
    X_test = data.loc[data[['array_event_id', 'run_id']].isin(test_i)[data[
        ['array_event_id', 'run_id']].isin(test_i) == True].dropna().index]
    y_train = truth.loc[truth[[
        'array_event_id', 'run_id'
    ]].isin(train_i)[truth[['array_event_id', 'run_id']].isin(train_i) ==
                     True].dropna().index]
    y_test = truth.loc[truth[['array_event_id', 'run_id']].isin(test_i)[truth[
        ['array_event_id', 'run_id']].isin(test_i) == True].dropna().index]

    if (args.sv):
        #calculate the mean scaled
        X_train = func.calc_scaled_width_and_length(X_train)
        X_test = func.calc_scaled_width_and_length(X_test)

    X1 = X_train.drop(['array_event_id', 'run_id'], axis=1).values
    X2 = X_test.drop(['array_event_id', 'run_id'], axis=1).values
    y1 = y_train.drop(['array_event_id', 'run_id'], axis=1).values
    y2 = y_test.drop(['array_event_id', 'run_id'], axis=1).values
    print("We use these attributes for the first RF: \n ",
          list(X_train.drop(['array_event_id', 'run_id'], axis=1)))

    ##################### Trafo #############################
    y1 = np.log(y1 + 3)
    RFr.fit(X1, y1)

    ############### overfitting ####################
    print("The oob_score is: ", RFr.oob_score_)

    ############# feature importance ################
    feature = RFr.feature_importances_
    indices = np.argsort(feature)[::-1]
    names = list(X_train.drop(['array_event_id', 'run_id'], axis=1))
    names = func.translate(names, 1)
    # Print the feature ranking
    print("Feature ranking:")
    for f in range(X1.shape[1]):
        print("%d. feature %s (%f)" %
              (f + 1, names[indices[f]], feature[indices[f]]))
    data1 = np.array([tree.feature_importances_ for tree in RFr.estimators_])
    data1 = data1[:, indices]
    position_ticks = np.arange(0, X1.shape[1]) + 1
    plt.boxplot(data1, notch=False)
    plt.xticks(position_ticks, [names[i] for i in indices], rotation=90)
    plt.ylabel('Wichtigkeit')
    plt.tight_layout()
    plt.savefig("plots/feautureimportance_boxplot_trafo_firstForest.pdf")
    plt.close()

    ################# prediction###############
    predictions = RFr.predict(X2)
    print("Trainiert mit:", y_train.shape[0], " \t Getestet mit: ",
          y_test.shape[0])
    pred = pd.DataFrame({
        'prediction': predictions,
        'mc_energy': y_test['mc_energy'],
        'array_event_id': y_test['array_event_id'],
        'run_id': y_test['run_id']
    })

    ######## Rücktransformation ###########
    predictions_rück = np.exp(predictions) - 3

    z = np.array([predictions_rück, y2[:, 0]])
    np.savetxt("data/trafo_pred_data.txt", z.T)

    print(
        'RandomForestRegressor:\n\t Coefficient for determination: %.2f \n' %
        r2_score(predictions_rück, y2[:, 0]),
        '\texplained_variance score: %.2f \n' %
        explained_variance_score(predictions_rück, y2[:, 0]),
        '\tmean squared error: %.2f \n' %
        mean_squared_error(predictions_rück, y2[:, 0]),
        "Finished with the first prediction ... \n")

    ######### gewichteter und nicht gewichteter Mittelwert ######################
    X_test_w = X_test.set_index(['run_id', 'array_event_id'])
    pred_w = pred.set_index(['run_id', 'array_event_id'])
    data_w = pd.concat([X_test_w, pred_w], axis=1).reset_index()
    truth_grouped = y_test.drop_duplicates().set_index(
        ['run_id', 'array_event_id'])
    ################ Mean and Median ##################
    x_grouped = data_w[['prediction', 'array_event_id',
                        'run_id']].groupby(by=['run_id', 'array_event_id'])
    pred_mean = x_grouped.mean()
    pred_mean.columns = ['mean_prediction']
    pred_mean = pd.concat([pred_mean, truth_grouped], axis=1)

    ####Rücktrafo ######
    pred_mean_rück = np.exp(pred_mean['mean_prediction']) - 3
    pred_mean_rück = pd.concat([pred_mean_rück, truth_grouped], axis=1)
    z = np.array([
        pred_mean_rück['mean_prediction'].values,
        pred_mean_rück['mc_energy'].values
    ])
    np.savetxt("data/trafo_pred_mean_data.txt", z.T)

    print(
        'RF with mean:\n\t Coefficient for determination: %.2f \n' %
        r2_score(pred_mean_rück['mean_prediction'].values,
                 pred_mean_rück['mc_energy'].values),
        '\texplained_variance score: %.2f \n' %
        explained_variance_score(pred_mean_rück['mean_prediction'].values,
                                 pred_mean_rück['mc_energy'].values),
        '\tmean squared error: %.2f \n' %
        mean_squared_error(pred_mean_rück['mean_prediction'].values,
                           pred_mean_rück['mc_energy'].values))

    # use the prediction_median for another RF
    encaps_info = [
        'width', 'length', 'num_triggered_telescopes', 'num_triggered_lst',
        'num_triggered_mst', 'num_triggered_sst', 'total_intensity',
        'array_event_id', 'run_id'
    ]
    data_encaps = X_test[encaps_info]
    pred_mean = pred_mean.reset_index()
    data_encaps = data_encaps.merge(pred_mean, on=['run_id', 'array_event_id'])
    train_i2, test_i2 = train_test_split(
        data_encaps[['array_event_id', 'run_id']], test_size=0.5)
    X2_train = data_encaps.loc[data_encaps[[
        'array_event_id', 'run_id'
    ]].isin(train_i2)[data_encaps[['array_event_id', 'run_id']].isin(train_i2)
                      == True].dropna().index]
    X2_test = data_encaps.loc[data_encaps[[
        'array_event_id', 'run_id'
    ]].isin(test_i2)[data_encaps[['array_event_id', 'run_id']].isin(test_i2) ==
                     True].dropna().index]
    if (args.sv):
        X2_train = func.calc_mean_scaled_width_and_length(X2_train)
        X2_test = func.calc_mean_scaled_width_and_length(X2_test)
    ######## neue Attribute berechnen #########
    ######### Mittelwert der Energien nur für die LST's ###########
    pred = data_w[[
        'prediction', 'array_event_id', 'run_id', 'telescope_type_id'
    ]]
    telescope_type = pred['telescope_type_id'].copy(deep=True)
    pred = pred.drop('telescope_type_id', axis=1)
    pred_lst = pred[telescope_type == 1]
    prediction_lst_max = pred_lst.groupby(
        by=list(['run_id', 'array_event_id'])).max().reset_index()
    prediction_lst_min = pred_lst.groupby(
        by=list(['run_id', 'array_event_id'])).min().reset_index()
    prediction_lst = pred_lst.groupby(
        by=list(['run_id', 'array_event_id'])).mean().reset_index()
    prediction_lst_std = pred_lst.groupby(
        by=list(['run_id', 'array_event_id'])).std().reset_index()
    prediction_lst_max = prediction_lst_max.rename(
        columns={'prediction': 'max_lst_pred'})
    prediction_lst_min = prediction_lst_min.rename(
        columns={'prediction': 'min_lst_pred'})
    prediction_lst = prediction_lst.rename(
        columns={'prediction': 'mean_lst_pred'})
    prediction_lst_std = prediction_lst_std.rename(
        columns={'prediction': 'std_lst_pred'})
    pred_mst = pred[telescope_type == 2]
    prediction_mst_max = pred_mst.groupby(
        by=list(['run_id', 'array_event_id'])).max().reset_index()
    prediction_mst_min = pred_mst.groupby(
        by=list(['run_id', 'array_event_id'])).min().reset_index()
    prediction_mst = pred_mst.groupby(
        by=list(['run_id', 'array_event_id'])).mean().reset_index()
    prediction_mst_std = pred_mst.groupby(
        by=list(['run_id', 'array_event_id'])).std().reset_index()
    prediction_mst = prediction_mst.rename(
        columns={'prediction': 'mean_mst_pred'})
    prediction_mst_std = prediction_mst_std.rename(
        columns={'prediction': 'std_mst_pred'})
    prediction_mst_max = prediction_mst_max.rename(
        columns={'prediction': 'max_mst_pred'})
    prediction_mst_min = prediction_mst_min.rename(
        columns={'prediction': 'min_mst_pred'})
    pred_sst = pred[telescope_type == 3]
    prediction_sst_max = pred_sst.groupby(
        by=list(['run_id', 'array_event_id'])).max().reset_index()
    prediction_sst_min = pred_sst.groupby(
        by=list(['run_id', 'array_event_id'])).min().reset_index()
    prediction_sst = pred_sst.groupby(
        by=list(['run_id', 'array_event_id'])).mean().reset_index()
    prediction_sst_std = pred_sst.groupby(
        by=list(['run_id', 'array_event_id'])).std().reset_index()
    prediction_sst = prediction_sst.rename(
        columns={'prediction': 'mean_sst_pred'})
    prediction_sst_std = prediction_sst_std.rename(
        columns={'prediction': 'std_sst_pred'})
    prediction_sst_max = prediction_sst_max.rename(
        columns={'prediction': 'max_sst_pred'})
    prediction_sst_min = prediction_sst_min.rename(
        columns={'prediction': 'min_sst_pred'})
    X2_train = X2_train.reset_index()
    X2_test = X2_test.reset_index()
    X2_train = X2_train.merge(
        prediction_lst, how='left', on=['run_id', 'array_event_id']).merge(
            prediction_lst_std, how='left',
            on=['run_id', 'array_event_id']).merge(
                prediction_mst, how='left',
                on=['run_id', 'array_event_id']).merge(
                    prediction_mst_std,
                    how='left',
                    on=['run_id', 'array_event_id']).merge(
                        prediction_sst,
                        how='left',
                        on=['run_id', 'array_event_id']).merge(
                            prediction_sst_std,
                            how='left',
                            on=['run_id', 'array_event_id']).merge(
                                prediction_lst_min,
                                how='left',
                                on=['run_id', 'array_event_id']).merge(
                                    prediction_lst_max,
                                    how='left',
                                    on=['run_id', 'array_event_id']).merge(
                                        prediction_mst_min,
                                        how='left',
                                        on=['run_id', 'array_event_id']).merge(
                                            prediction_mst_max,
                                            how='left',
                                            on=['run_id',
                                                'array_event_id']).merge(
                                                    prediction_sst_min,
                                                    how='left',
                                                    on=[
                                                        'run_id',
                                                        'array_event_id'
                                                    ]).merge(
                                                        prediction_sst_max,
                                                        how='left',
                                                        on=[
                                                            'run_id',
                                                            'array_event_id'
                                                        ])
    X2_test = X2_test.merge(
        prediction_lst, how='left', on=['run_id', 'array_event_id']).merge(
            prediction_lst_std, how='left',
            on=['run_id', 'array_event_id']).merge(
                prediction_mst, how='left',
                on=['run_id', 'array_event_id']).merge(
                    prediction_mst_std,
                    how='left',
                    on=['run_id', 'array_event_id']).merge(
                        prediction_sst,
                        how='left',
                        on=['run_id', 'array_event_id']).merge(
                            prediction_sst_std,
                            how='left',
                            on=['run_id', 'array_event_id']).merge(
                                prediction_lst_min,
                                how='left',
                                on=['run_id', 'array_event_id']).merge(
                                    prediction_lst_max,
                                    how='left',
                                    on=['run_id', 'array_event_id']).merge(
                                        prediction_mst_min,
                                        how='left',
                                        on=['run_id', 'array_event_id']).merge(
                                            prediction_mst_max,
                                            how='left',
                                            on=['run_id',
                                                'array_event_id']).merge(
                                                    prediction_sst_min,
                                                    how='left',
                                                    on=[
                                                        'run_id',
                                                        'array_event_id'
                                                    ]).merge(
                                                        prediction_sst_max,
                                                        how='left',
                                                        on=[
                                                            'run_id',
                                                            'array_event_id'
                                                        ])
    ##### if there is no lst or sst or mst who has seen this event, I set the mean and std on 0 and the std is Nan if there is just one prediction
    X2_test = X2_test.fillna(0)
    X2_test = shuffle(X2_test)
    y2_test = X2_test[['mc_energy']].copy(deep=True)
    X2_test = X2_test.drop(['mc_energy', 'array_event_id', 'run_id'], axis=1)
    X2_train = X2_train.fillna(0)
    X2_train = shuffle(X2_train)
    y2_train = X2_train[['mc_energy']].copy(deep=True)
    X2_train = X2_train.drop(['mc_energy', 'array_event_id', 'run_id'], axis=1)
    #fit and pred
    RFr2 = RandomForestRegressor(max_depth=10,
                                 n_jobs=-1,
                                 n_estimators=100,
                                 oob_score=True,
                                 max_features='sqrt')
    print("We use these attributes for the second RF: \n ", list(X2_train))
    ########### Trafo ############
    y2_train = np.log(y2_train + 3)
    RFr2.fit(X2_train.values, y2_train.values)
    ############### overfitting ####################
    print("The oob_score is: ", RFr2.oob_score_)
    ############# feature importance ################
    feature = RFr2.feature_importances_
    std = np.std([tree.feature_importances_ for tree in RFr2.estimators_],
                 axis=0)
    indices = np.argsort(feature)[::-1]
    names = list(X2_train)
    names = func.translate(names, 2)
    # Print the feature ranking
    print("Feature ranking:")
    for f in range(X2_train.shape[1]):
        print("%d. feature %s (%f)" %
              (f + 1, names[indices[f]], feature[indices[f]]))
    data2 = np.array([tree.feature_importances_ for tree in RFr2.estimators_])
    data2 = data2[:, indices]
    position_ticks = np.arange(0, X2_train.shape[1]) + 1
    plt.boxplot(data2, notch=False)
    plt.xticks(position_ticks, [names[i] for i in indices], rotation=90)
    plt.ylabel('Wichtigkeit')
    plt.tight_layout()
    plt.savefig("plots/feautureimportance_boxplot_trafo_secondForest.pdf")
    plt.close()
    ####### Predictions ################
    prediction_encaps = RFr2.predict(X2_test.values)
    print("Trainiert mit:", y2_train.shape[0], " \t Getestet mit: ",
          y2_test.shape[0])
    ############ Rücktrafo ###########
    prediction_encaps = np.exp(prediction_encaps) - 3

    print("Trainiert mit:", y2_train.shape[0], " \t Getestet mit: ",
          y2_test.shape[0])
    z = np.array([prediction_encaps, y2_test['mc_energy'].values])
    np.savetxt("data/trafo_encaps_pred_data.txt", z.T)

    print(
        'encapsulated with median_prediction RF:\n\t Coefficient of determination: %.2f\n'
        % r2_score(prediction_encaps, y2_test.values),
        '\texplained_variance score: %.2f \n' %
        explained_variance_score(prediction_encaps, y2_test.values),
        '\tmean squared error: %.2f \n' %
        mean_squared_error(prediction_encaps, y2_test.values),
        "Finished with the encapsulated prediction \n")
Beispiel #9
0
import os

import functions

FILE_AFNOR = "181017151004_2119332 - Anonyme.AFN.txt"
FILE_AFNOR_2 = "181017151004_2119332 - Anonyme_666.AFN.txt"

functions.recuperation(FILE_AFNOR, "temp1.csv")
functions.translate("temp1.csv", "temp2.csv")
functions.analysis("temp2.csv", "prescription.obj")

functions.recuperation(FILE_AFNOR_2, "temp3.csv")
functions.translate("temp3.csv", "temp4.csv")
functions.analysis("temp4.csv", "prescription_666.obj")

try:
    os.remove("temp1.csv")
    os.remove("temp2.csv")
    os.remove("temp3.csv")
    os.remove("temp4.csv")
except FileNotFoundError:
    pass

OBJ = functions.load_object("prescription.obj")
OBJ2 = functions.load_object("prescription_666.obj")

functions.compare_prescription(OBJ, OBJ2)