Beispiel #1
0
np.random.seed(123)  # for reproducibility

# In[5]:

from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.utils import np_utils
from keras.datasets import mnist
#############################################

# In[6]:

# 1. Load data into train and test sets
X, y = load_data("../data/promoters.data.txt")  # sequences, labels
X = get_rep_mats(X)  # convert to array of representation matrices
############

# In[ ]:

for i in X:
    for idx, j in enumerate(i):
        i[idx] = j[0]
############

# In[ ]:

y = conv_labels(y, "promoter")  # convert to integer labels
X = np.asarray(X)  # work with np arrays
y = np.asarray(y)
X_train = X[0:90]
Beispiel #2
0
        seq = seq.upper()    # b/c rep matrix built on uppercase
        seq = seq.replace("\t","")      # present in promoter 
        seq = seq.replace("N","A")  # undetermined nucleotides in splice
        seq = seq.replace("D","G")
        seq = seq.replace("S","C")
        seq = seq.replace("R","G")
        #####
        labels.append(label)
        seqs.append(seq)
    f.close()
    return seqs, labels


# In[11]:


if __name__ == "__main__":
    # reading in splice junction input data and converting to required format
    seqs, labels = load_data("../data/splice.data.txt")
    lbls_mod = conv_labels(labels)
    seqs_mod = get_rep_mats(seqs)
    print (len(seqs_mod))
    print (len(lbls_mod))


# In[ ]: