Esempio n. 1
0
def getEncoded(dataLocation = "fakefasta.fasta"):
    dataobj = readdata.readData(dataLocation,["host","fasta"])
    dataobj.cleanHost()
    xfull,yfull = dataobj.sepDataLabels()

    encoded = []
    for i in range(len(xfull)):
        encode = []
        encode.append(i)  #add id

        encode.append(encode1(xfull[i])) #add onehots

        labels = []
        for key in yfull[i]:
            label = yfull[i][key]
            labels.append(label)
        encode.append(labels) #add labels
        encoded.append(encode)
    return encoded
Esempio n. 2
0
    [0, 0, 1, 0, 0, 1]
])

# The output of the exclusive OR function follows.

# In[4]:

#output data
#bpm of the track

y = np.array([[0], [1], [0.5], [0.33], [0.66], [0.66], [0.33]])

print("ici")
import readdata

X, y = readdata.readData("who cares")
print("ici", X.shape, y.shape)

# The seed for the random generator is set so that it will return the same random numbers each time, which is sometimes useful for debugging.

# In[5]:

#Number_Repete = 1
Number_Sample = len(X[1, :])
#Number_Sample_Boucle = int(Number_Sample/Number_Repete)

Number_Example = len(y)

Number_Neurons = 2

#print("Nb sample", Number_Repete, Number_Sample, Number_Sample_Boucle)
Esempio n. 3
0
import numpy as np
from keras.utils import to_categorical
import readdata

def oneHot(c):
    onehot = [0] * 26
    onehot[ord(c)-65] = 1
    return onehot

plvlmax = 15

inp = "HA_t_complete_some.fasta"
outp = "phocs/phoc"

dataobj = readdata.readData(inp)
x, y = dataobj.sepDataLabels()

for lvl in range(11,plvlmax+1):
    print("level = " + str(lvl))
    phocs = dataobj.dataToPhoc(x,lvl)
    goal = open(outp + str(lvl) + ".txt","w")
    for i in range(len(phocs)):
        line = str(phocs[i]).replace(",","")[1:-1] + " "
        labels = y[i]
        for key in labels.keys():
            line += str(labels[key]) + " "
        line = line[:-1] + "\n"
        goal.write(line)
    goal.close()
Esempio n. 4
0
# 0/1 classification problem is hard for us if we adopt linear regression, so we use logistic regression to
# accomplish this, with use of sigmoid function

import numpy as np
import readdata
import plotdata
from computeCost import computeCost
from gradientDescent import logisticDerivative
from gradientDescent import gradientDescent
from scipy.optimize import fmin_bfgs

if __name__ == "__main__":
    (x, y, num) = readdata.readData()
    plotdata.plotPoints(x, y)
    shape = x.shape
    numOfFeatures = shape[0]
    X = np.ones([numOfFeatures + 1, num])
    X[1:, :] = x[:, :]
    theta = np.zeros(numOfFeatures + 1)
    print computeCost(X, y, theta)
    flag = 1
    # way to control which algorithm to choose 0 -> iteration way, 1 -> optimized way (bfgs algorithm)
    if flag == 0:
        iterations = 100000
        alpha = 0.001
        plotdata.plotTheta(x, y, gradientDescent(X, y, theta, alpha,
                                                 iterations))
    else:
        theta = fmin_bfgs(computeCost, np.transpose(theta), args=(X, y))
        plotdata.plotTheta(x, y, theta)
            if np.random.uniform(
            ) < split:  #chance of adding it to training or testing set (randomly)
                x.append(data[i])
                e = dict(host=labelts[i])
                y.append(e)
            else:
                xt.append(data[i])
                e = dict(host=labelts[i])
                yt.append(e)

    return (x, y, xt, yt)


dataLocation = "HA_t_complete_human.fasta"
print("reading data")
dataobj = readdata.readData(dataLocation, ["host", "fasta"])
print("Cleaning labels")
dataobj.cleanHost()
print("separating data and labels")
xfull, yfull = dataobj.sepDataLabels()
print("translating data to phoc")
xfull = dataobj.dataToPhoc(xfull, phoclevel, False)
'''
#x and y are training data and training labels, xt and yt are test data and test labels, respectively
x = []
y = []
xt = []
yt = []
#keep track of the indexes for debugging purposes
trainIndexes = []
testIndexes = []