def createModel(): maxlen = 150 index_dict = pickle.load(open('./w2v_index.pkl', 'r')) vec_dict = pickle.load(open('./w2v_vec.pkl', 'r')) n_words = len(index_dict.keys()) print n_words vec_matrix = np.zeros((n_words + 1, 100)) for k, i in index_dict.items(): #将所有词索引与词向量一一对应 try: vec_matrix[i, :] = vec_dict[k] except: print k, i print vec_dict[k] exit(1) labels = getLabels() sentences = loadData('./sen_cut.txt') X_train, X_test, y_train, y_test = train_test_split(sentences, labels, test_size=0.2) X_train = text2index(index_dict, X_train) X_test = text2index(index_dict, X_test) print u"训练集shape: ", np.shape(X_train) print u"测试集shape: ", np.shape(X_test) y_train = np.array(y_train) y_test = np.array(y_test) print('Pad sequences (samples x time)') X_train = sequence.pad_sequences(X_train, maxlen=maxlen) #扩展长度不足的补0 X_test = sequence.pad_sequences(X_test, maxlen=maxlen) print u"训练集shape: ", np.shape(X_train) print u"测试集shape: ", np.shape(X_test) train_lstm(n_words + 1, vec_matrix, X_train, y_train, X_test, y_test)
def getSVMModel(verbose=0): train_X, test_X, train_Y, test_Y = loadData( '/root/Desktop/hack/CHD/New_Model/dataset.csv') train_X = train_X[:len(train_X) / 100] train_Y = train_Y[:len(train_Y) / 100] model = svm.SVC(kernel="rbf", C=100, gamma=0.1) #train model model.fit(train_X, train_Y) predicted_labels = model.predict(test_X) if (verbose == 1): print("FINISHED classifying. accuracy score : ") print(accuracy_score(test_Y, predicted_labels)) return model
def load_data(fileobj, batch_size): """ Loads the dataset :type dataset: FileObj, typically created by `open(filename, 'rb')` :param dataset: the path to the dataset :type batch_size: int :param batch_size: Maximum number of examples to load. """ # Load the dataset data_set = loadData(fileobj, batch_size) def shared_dataset(data_xy, borrow=True): """ Function that loads the dataset into shared variables The reason we store our dataset in shared variables is to allow Theano to copy it into the GPU memory (when code is run on GPU). Since copying data into the GPU is slow, copying a minibatch everytime is needed (the default behaviour if the data is not in a shared variable) would lead to a large decrease in performance. """ #print(len(data_xy[1])) data_x, data_y = data_xy shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX), borrow=borrow) shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX), borrow=borrow) # When storing data on the GPU it has to be stored as floats # therefore we will store the labels as ``floatX`` as well # (``shared_y`` does exactly that). But during our computations # we need them as ints (we use labels as index, and if they are # floats it doesn't make sense) therefore instead of returning # ``shared_y`` we will have to cast it to int. This little hack # lets ous get around this issue return shared_x, T.cast(shared_y, 'int32') data_set_x, data_set_y = shared_dataset(data_set) return (data_set_x, data_set_y)
from sklearn.linear_model import LogisticRegression from sklearn.model_selection import StratifiedKFold from sklearn import metrics import warnings warnings.filterwarnings('ignore') import matplotlib.pyplot as plt from keras.models import Sequential from keras.layers import Dense, Activation import numpy as np import keras import math from LoadData import loadData #Load data train_X, test_X, train_Y, test_Y = loadData('dataset4.csv') def KerasModel (): model = Sequential() model.add(Dense(12, input_dim=9, activation='relu')) #model.add(Dense(12, activation='relu')) model.add(Dense(1, activation='sigmoid')) adamm = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=math.pow(10,-8), decay=0.0) model.compile(optimizer=adamm, loss='binary_crossentropy', metrics=['accuracy']) history = model.fit(x=np.array(train_X), y=np.array(train_Y), validation_data=(np.array(test_X), np.array(test_Y)) , nb_epoch=50, batch_size=32) plt.plot(history.history['acc'])
return model '''Define the necessary parameters''' fs=64 #sampling rate of processed EEG and Audio signals n_hidden=2 dropout=0.25 trainPredWin=60*fs #Training prediction Window numBlocks=48 #EEG Signal divided in 48 blocks, 60s each '''############## MAIN CODE ################''' for Subject in range(1,2): #Choose which subject to process workingDir='E:\dolhopia\DNN\EasyModel' #Provide your own working path here a, b, chans, eeg=loadData(Subject) #Load Dataset. a and b show to which stream subject was attending numChans=len(chans) numSides=2 Audio =io.loadmat(workingDir+'/Data/wav.mat'); LAGS15 = [250] #Define the Lag eegData=np.zeros((numBlocks,trainPredWin,numChans),dtype=np.float32) targetAudio=np.zeros((numBlocks,trainPredWin,numSides),dtype=np.float32) '''Split Dataset in 48 blocks''' for block in range(numBlocks): eegData[block,:,:]=eeg['eegNH'+str(Subject)][block*trainPredWin:(block+1)*trainPredWin,:] targetAudio[block,:,a]=Audio["EnvA"][block*trainPredWin:(block+1)*trainPredWin,0] #Here you need to Load the envelopes for the attended signal targetAudio[block,:,b]=Audio["EnvU"][block*trainPredWin:(block+1)*trainPredWin,0] #Here you need to Load the envelopes for the unattended signal
import pandas as pd import random as rand #Módulo para importar la base de datos propia from LoadData import loadData # Importar el set de datos de nuestra base propia: ############################ '''Especificar el nombre de la carpeta de la base de datos:''' DatabaseName0 = "FinalDatabase30" DatabaseName01 = "FinalDatabase301" DatabaseName02 = "FinalDatabase302" DatabaseName03 = "FinalDatabase303" DatabaseName04 = "FinalDatabase304" DatabaseName05 = "FinalDatabase305" ''' En esta variación utilizamos DTt solo ''' ''' ================================================================ ''' [data0a, data0b, data0c] = loadData(DatabaseName0) train_experiments0 = [] train_solutions0 = [] test_experiments0 = [] test_solutions0 = [] validation_experiments0 = [] validation_solutions0 = [] ''' ================================================================ ''' [data01a, data01b, data01c] = loadData(DatabaseName01) train_experiments01 = [] train_solutions01 = [] test_experiments01 = [] test_solutions01 = [] validation_experiments01 = [] validation_solutions01 = [] ''' ================================================================ '''
def cleanUp(): df = loadData() df = df.drop_duplicates(subset=['TimeSt', 'Latitude', 'Longitude'], keep='first') return df
from sklearn.cross_validation import KFold from sklearn.linear_model import LogisticRegression from sklearn import metrics import warnings warnings.filterwarnings('ignore') import matplotlib.pyplot as plt from keras.models import Sequential from keras.layers import Dense, Activation import numpy as np import keras import math from LoadData import loadData #Load data train_X, test_X, train_Y, test_Y = loadData('diabetes.csv') def KerasModel(): model = Sequential() model.add(Dense(8, input_dim=8, activation='relu')) model.add(Dense(12, activation='relu')) model.add(Dense(1, activation='sigmoid')) adamm = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=math.pow(10, -8), decay=0.0) model.compile(optimizer=adamm, loss='binary_crossentropy',
network = makeNetwork(in_layer_size, 2000, out_layer_size) dataset = SupervisedDataSet(in_layer_size, out_layer_size) trainer = BackpropTrainer(network, dataset, learningrate=0.1/200) for epoch in range(3): print("Epoch " + str(epoch+1)) training_file = open("training-data.pkl", "rb") validation_file = open("validation-data.pkl", "rb") batch_iter = 0 while True: print("batch_iter: " + str(batch_iter)) training_set_x, training_set_y = loadData(training_file, 1) print(len(training_set_x)) if len(training_set_x) == 0: break print("there") for i in range(len(training_set_x)): dataset.addSample(training_set_x[i],training_set_y[i]) print("here") trainer.train() print("now") dataset.clear() batch_iter += 1 # Clear references to these so the garbage collector can clean them # once the garbage collector chooses to.
print(line, file=output) handle.write(line + '\n') print("*********************************************\n", file=output) # load data dataset = args.dataset.lower() if dataset == 'synthetic': load_path = args.load_path + dataset + '/' + str(args.ncliq) + '/' else: load_path = args.load_path + dataset + '/' features = args.features.lower() labels = args.labels.lower() transmode = args.transfer_mode B_from_A = args.b_from_a visualize = args.visualize n_nodes, n_features, n_labels, A, Afeatures, Alabels, Atrain, Atest, B, Bfeatures, Blabels = loadData( load_path, features, labels, transmode, B_from_A, visualize) if labels == 'pagerank': n_layers = max(10, args.depth) else: n_layers = max(1, args.depth) n_embedding = args.nembedding topology_similarity = args.topology_similarity embedding_type = args.embedding_type embedding_similarity = args.embedding_similarity if embedding_type == 'skipgram': embedding_similarity = 'softmax' if embedding_similarity == 'softmax': n_negative = args.nnegative scale_negative = args.scale_negative
from LoadData import loadData X, Y = loadData('./Train', spectrogram_step=15) X_test, Y_test = loadData('./Test', spectrogram_step=15) import tflearn import os os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # Building Residual Network # | Adam | epoch: 040 | loss: 0.03133 - acc: 0.9901 | val_loss: 0.06419 - val_acc: 0.9819 -- iter: 5206/5206 net = tflearn.input_data(shape=[None, 121, 35]) net = tflearn.max_pool_1d(net, 4) net = tflearn.conv_1d(net, 128, 6, activation='relu') net = tflearn.max_pool_1d(net, 2) net = tflearn.conv_1d(net, 128, 3, activation='relu') net = tflearn.avg_pool_1d(net, 2) net = tflearn.fully_connected(net, 128, activation='relu') net = tflearn.dropout(net, 0.7) net = tflearn.fully_connected(net, 14, activation='softmax') net = tflearn.regression(net, optimizer='adam', loss='categorical_crossentropy', learning_rate=0.005)
from LoadData import loadData import os X, _, P = loadData("./Train") i = 0 for elem in X: value = abs(sum(elem)) if value < 1: os.remove(P[i]) print("Bardzo slaby plik: " + P[i]) elif value < 3: print("Slaby plik: " + P[i]) i = i + 1
def main(): id_ens = dict_idEns["maths"] X, y = loadData(id_ens) #df_year1, df_year2 menuOptions(X, y) print("final")
'AD': np.array([0, 0, 1]) } group = {0: 'CN', 1: 'MCI', 2: 'AD'} num_labels = 3 # file data_path = '/home/share/data/ADNI1_Annual_2_Yr_3T' summary_path = '/home/zhengjiaqi/zhengjiaqi/summary/' # device device = '/gpu:1' os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3,4" trainNum = 270 Xshape = (111, 111, 111) X, Y = loadData(data_path, groupMap, Xshape) X = X.reshape((-1, 111, 111, 111, 1)) Y = Y.reshape((-1, 3)) X_train = X[:270] X_test = X[270:] Y_train = Y[:270] Y_test = Y[270:] # Accuracy function def get_accuracy(predictions, labels): return 100 * tf.reduce_mean( tf.cast(tf.equal(tf.argmax(predictions, 1), tf.argmax(labels, 1)), tf.float32))