Ejemplo n.º 1
0
def predictFluSeq(seqs):  # Seqs is the file path of your FASTA files
    #returns cross-val scores and MSE
    X0 = []

    # adding to X and y

    for i in range(0, len(seqs) - 1):
        X0.append(seqs[i].seq)

    y0 = []
    for j in range(1, len(seqs)):
        y0.append(seqs[i].seq)

    from Encoding_v2 import encoding

    # Encoding letters into numbers

    X = []
    for k in range(len(X0)):
        encoded_X = encoding(X0[k])
        X.append(encoded_X)

    y = []
    for l in range(len(y0)):
        encoded_y = encoding(y0[l])
        y.append(encoded_y)

    from sklearn.model_selection import cross_val_score, train_test_split
    from sklearn import ensemble, metrics

    # Cross-Validation
    rfr = ensemble.RandomForestRegressor()
    rfrscores = cross_val_score(rfr, X, y, cv=2)

    cv_score = ("Random Forests cross-validation score", rfrscores)
    avg_cv_score = ("Average Cross-Val Accuracy: %0.2f (+/- %0.2f)" %
                    (rfrscores.mean() * 100, rfrscores.std() * 100))

    # Mean Squared Error
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.5,
                                                        random_state=50)

    rfr.fit(X_train, y_train)
    y_predicted = rfr.predict(X_test)
    mse_score = ('Random Forests MSE:',
                 metrics.mean_squared_error(y_test, y_predicted))

    return cv_score, avg_cv_score, mse_score
Ejemplo n.º 2
0
def predictFluSeq(seqs): # Seqs is the file path of your FASTA files
    #returns cross-val scores and MSE
    X0 = []

    # adding to X and y

    for i in range(0, len(seqs) - 1):
        X0.append(seqs[i].seq)

    y0 = []
    for j in range(1, len(seqs)):
        y0.append(seqs[i].seq)

    from Encoding_v2 import encoding

    # Encoding letters into numbers

    X = []
    for k in range(len(X0)):
        encoded_X = encoding(X0[k])
        X.append(encoded_X)

    y = []
    for l in range(len(y0)):
        encoded_y = encoding(y0[l])
        y.append(encoded_y)

    from sklearn import ensemble, cross_validation, metrics

    # Cross-Validation
    rfr = ensemble.RandomForestRegressor()
    rfrscores = cross_validation.cross_val_score(rfr, X, y, cv=2)

    cv_score = ("Random Forests cross-validation score", rfrscores)
    avg_cv_score = ("Average Cross-Val Accuracy: %0.2f (+/- %0.2f)" % (rfrscores.mean()*100, rfrscores.std() *100))

    # Mean Squared Error
    X_train,X_test,y_train,y_test = cross_validation.train_test_split(X,y,test_size=0.5,random_state=50)

    rfr.fit(X_train,y_train)
    y_predicted = rfr.predict(X_test)
    mse_score = ('Random Forests MSE:', metrics.mean_squared_error(y_test,y_predicted))

    return cv_score, avg_cv_score, mse_score
Ejemplo n.º 3
0
#adding to X and y

for i in range(0, len(new) - 1):
    X0.append(new[i].seq)

#print len(X0)

y0 = []
for j in range(1, len(new)):
    y0.append(new[i].seq)

from Encoding_v2 import encoding

X = []
for k in range(len(X0)):
    encoded_X = encoding(X0[k])
    X.append(encoded_X)

y = []
for l in range(len(y0)):
    encoded_y = encoding(y0[l])
    y.append(encoded_y)
'''
print len(X[0])
print len(y[298])

a = [1,2,3,4,5]
print len(a)

from Compare_Strains import test_length
Ejemplo n.º 4
0
#adding to X and y

for i in range(0,len(new)-1):
    X0.append(new[i].seq)


y0 = []
for j in range(1,len(new)):
    y0.append(new[i].seq)
    
from Encoding_v2 import encoding
# Changing A,C,T,G into 1,2,3,4

X = []
for k in range(len(X0)):
    encoded_X = encoding(X0[k])
    X.append(encoded_X)
    
y = []
for l in range(len(y0)):
    encoded_y = encoding(y0[l])
    y.append(encoded_y)


# Using sklearn models for prediction
from sklearn import tree
dtr = tree.DecisionTreeRegressor()
dtr.fit(X,y)

from sklearn import cross_validation
dtrscores = cross_validation.cross_val_score(dtr,X,y,cv=2)
Ejemplo n.º 5
0
#adding to X and y

for i in range(0, len(new) - 1):
    X0.append(new[i].seq)

y0 = []
for j in range(1, len(new)):
    y0.append(new[i].seq)

from Encoding_v2 import encoding, decoding, compare_sequences
# Encoding

X = []
for k in range(len(X0)):
    encoded_X = encoding(X0[k])
    X.append(encoded_X)

y = []
for l in range(len(y0)):
    encoded_y = encoding(y0[l])
    y.append(encoded_y)

# ML and accuracy
from sklearn import tree
dtr = tree.DecisionTreeRegressor()
dtr.fit(X, y)

from sklearn.model_selection import cross_val_score, train_test_split
dtrscores = cross_val_score(dtr, X, y, cv=2)
print('Decision Trees', dtrscores)