def getContInputOutput(trainData): sequence_length = 1440 #entire day of data # create a dictionary to map pitches to integers network_input = [] network_output = [] # create input sequences and the corresponding outputs for i in range(0, len(trainData) - sequence_length - PRED_TIME): curr, fut_ = trainData[i + sequence_length], trainData[i + sequence_length + PRED_TIME] sequence_in = trainData[i:i + sequence_length] sequence_out = (fut_ - curr) / curr # print type(sequence_out) network_input.append([price for price in sequence_in]) network_output.append(sequence_out) print "cont", network_output[0] n_patterns = len(network_input) # reshape the input into a format compatible with LSTM layers network_input = np.reshape(network_input, (n_patterns, sequence_length, 1)) network_output = np.asarray(network_output) # normalize input network_input = np_utils.normalize(network_input) network_output = np_utils.normalize(network_output) # network_output = np_utils.to_categorical(network_output) return network_input, network_output
def feature_preprocess(feat): """ Input feature is extracted according to Section 4.2 in the paper """ # subject classeme + object classeme # feat[:, 0: 70] # subject TrajectoryShape + HoG + HoF + MBH motion feature # (since this feature is Bag-of-Word type, we l1-normalize it so that # each element represents the fraction instead of count) feat[:, 70: 1070] = np_utils.normalize(feat[:, 70: 1070], -1, 1) feat[:, 1070: 2070] = np_utils.normalize(feat[:, 1070: 2070], -1, 1) feat[:, 2070: 3070] = np_utils.normalize(feat[:, 2070: 3070], -1, 1) feat[:, 3070: 4070] = np_utils.normalize(feat[:, 3070: 4070], -1, 1) # object TrajectoryShape + HoG + HoF + MBH motion feature feat[:, 4070: 5070] = np_utils.normalize(feat[:, 4070: 5070], -1, 1) feat[:, 5070: 6070] = np_utils.normalize(feat[:, 5070: 6070], -1, 1) feat[:, 6070: 7070] = np_utils.normalize(feat[:, 6070: 7070], -1, 1) feat[:, 7070: 8070] = np_utils.normalize(feat[:, 7070: 8070], -1, 1) # relative posititon + size + motion feature # feat[:, 8070: 9070] # feat[:, 9070: 10070] # feat[:, 10070: 11070] return feat
def getBinInputOutput(trainData): sequence_length = 900 #15 hours # create a dictionary to map pitches to integers network_input = [] network_output = [] # create input sequences and the corresponding outputs for i in range(0, len(trainData) - sequence_length - PRED_TIME): curr, fut_ = trainData[i + sequence_length], trainData[i + sequence_length + PRED_TIME] sequence_in = trainData[i:i + sequence_length] sequence_out = (fut_ - curr) / curr if (fut_ - curr) > 0: sequence_out = 1 else: sequence_out = 0 network_input.append([price for price in sequence_in]) network_output.append(sequence_out) n_patterns = len(network_input) print "###### done with sequencing 1" # reshape the input into a format compatible with LSTM layers network_input = np.reshape(network_input, (n_patterns, sequence_length, 1)) network_output = np.asarray(network_output) # normalize input network_input = np_utils.normalize(network_input) # network_output = np_utils.normalize(network_output) network_outputCat = np_utils.to_categorical(network_output, 2) # for ind in range(len(network_output)): # print network_outputCat[ind], network_output[ind] print "###### done with sequencing 2" return network_input, network_outputCat
def isGameOver(self,rawPixels): signalEncode = self.signalEncoder(rawPixels) cut = signalEncode[:,40:80,:] flatten = normalize(cut.reshape((1,1200))) if self.model.predict_classes(flatten,verbose=0) == 0: print('game over!') return True return False
def sum_word_embeddings(self, text): tokens = self.tokenize_text([text]) X = self.transform_texts(tokens)[0] embed = numpy.zeros(self.EMBED_DIM) embeddings = self.model.layers[1].get_weights()[0] for (i, word) in enumerate(X): embed += embeddings[word] embed = np_utils.normalize(embed)[0] return embed
def feature_preprocess(feat): # subject classeme + object classeme feat[:, 70:1070] = np_utils.normalize(feat[:, 70:1070], -1, 1) # subject HoG + HoF + MBH motion feature feat[:, 1070:2070] = np_utils.normalize(feat[:, 1070:2070], -1, 1) feat[:, 2070:3070] = np_utils.normalize(feat[:, 2070:3070], -1, 1) feat[:, 3070:4070] = np_utils.normalize(feat[:, 3070:4070], -1, 1) # object HoG + HoF + MBH motion feature feat[:, 4070:5070] = np_utils.normalize(feat[:, 4070:5070], -1, 1) feat[:, 5070:6070] = np_utils.normalize(feat[:, 5070:6070], -1, 1) feat[:, 6070:7070] = np_utils.normalize(feat[:, 6070:7070], -1, 1) # relativity feature feat[:, 7070:8070] = np_utils.normalize(feat[:, 7070:8070], -1, 1) return feat
def gen_model(dest_model, batch_size, epochs, nb_classes, num_model, path_data, path_dataset=None): img_data, labels = iter_images(batch_size, batch_size, path_data, path_dataset) data = np.asarray(img_data) data = data.astype('float32') / 255.0 labels = np.asarray(labels) # Split the data x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.30, shuffle=True) np_utils.normalize(x_train) np_utils.normalize(x_test) y_train_binary = to_categorical(y_train, num_classes=nb_classes) y_test_binary = to_categorical(y_test, num_classes=nb_classes) if os.path.exists(dest_model): model = load_model(dest_model) else: model, history = train_model(x_train, y_train_binary, nb_classes, batch_size, epochs, num_model) save_model(model, history, dest_model) del history save_scores(dest_model, model, x_test, y_test_binary) del model keras.backend.clear_session() gc.collect()
def prepareInputData(data): dataset = np.array(data) # dataset = np.random.shuffle(dataset) rows, cols = dataset.shape ### Extracting X in shape (:,1:cols) i.e. except 1st column X = dataset[:, 1:cols] ### Normalizing X X = normalize(X, axis=1) # X = X/255 # Extracting Y as the first column Y = dataset[:, 0] ### Resizing Y in the form (len(Y),1) Y = Y.reshape(Y.shape[0], 1) return X, Y
def keras_model(cleaned, train_test, test, used_data, save_id): model = Sequential() X_train = cleaned[used_data].as_matrix() Y_train = np_utils.to_categorical(cleaned['is_delayed'].as_matrix()) input_dim = X_train.shape[1] X_test = np_utils.normalize(train_test[used_data].as_matrix(), axis=0) model.add(Dense(2 * input_dim, input_dim=input_dim, activation='relu')) model.add(Dense(input_dim, activation='relu')) model.add(Dense(2, activation='softmax')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) model.fit(X_train, Y_train, epochs=50, batch_size=200) predictions = model.predict_proba(X_test) predictions0 = predictions[:, 0] predictions1 = predictions[:, 1] print('keras') print(roc_auc_score(train_test['is_delayed'], predictions0)) print(roc_auc_score(train_test['is_delayed'], predictions1))
for i in range(sample_sent_len): for j, index in enumerate(sentence[-step_size:]): x[0, j] = index preds = model.predict(x)[0][-1] next_index = sample(preds) sentence.append(next_index) print_sentence(sentence) # 임의의 문장을 추출 sample_sentences(num_sentences=20, sample_sent_len=15) # 가중치 정규화 norm_weights = np_utils.normalize(model.get_weights()[0]) # 가까운 의미의 단어를 표시하는 함수 def print_closest_words(word, nb_closest=10): index = word_index[word] distances = np.dot(norm_weights, norm_weights[index]) c_indexes = np.argsort(np.squeeze(distances))[-nb_closest:][::-1] for c_index in c_indexes: print(index_word[c_index], distances[c_index]) # 가까운 의미의 단어 words = [ "3", "two",
Tanh(16, 10), ]) # train net.train(x_train, y_train, learning_rate=0.2, epochs=500) # test print('Accuracy in test set: ', net.get_accuracy(x_test, y_test)) # saving model net.save_model('model_tanh8.json') #loading data MNIST (x_train, y_train), (x_test, y_test) = load_data() x_train = normalize(x_train) x_test = normalize(x_test) y_train = to_categorical(y_train) y_test = to_categorical(y_test) x_train = x_train.reshape(x_train.shape[0], 28 * 28, 1) x_test = x_test.reshape(x_test.shape[0], 28 * 28, 1) y_train = y_train.reshape(y_train.shape[0], 10, 1) y_test = y_test.reshape(y_test.shape[0], 10, 1) # neural network build net = NeuralNetwork([ Sigmoid(28 * 28, 32), Sigmoid(32, 32), Sigmoid(32, 10), ])
def main(): MAX_VOCAB = 6000 WINDOW_SIZE = 4 LEVEL = 'char' EMBED_DIM = 100 MAX_TOKEN_LEN = 15 NB_LAYERS = 1 NB_EPOCHS = 3 cutoff = 10000000 words = codecs.open('../data/Austen_Sense.txt', 'r', encoding='utf8') \ .read().lower().split()[:cutoff] print('Loaded', len(words), 'words') cnt = Counter(words) most_comm = [k for k, v in cnt.most_common(500)] print('Most frequent:', most_comm[:50]) word_to_int = {'UNK': 0} for w, c in cnt.most_common(MAX_VOCAB): word_to_int[w] = len(word_to_int) int_to_word = [None] * len(word_to_int) for k, v in word_to_int.items(): int_to_word[v] = k if LEVEL == 'char': char_vector_dict, char_idx = index_characters(int_to_word) print(char_vector_dict.keys()) model = build_model(vocab_size=len(word_to_int), embed_dim=EMBED_DIM, level=LEVEL, token_len=MAX_TOKEN_LEN, token_char_vector_dict=char_vector_dict, nb_recurrent_layers=NB_LAYERS) most_comm_X = vectorize_tokens(tokens=most_comm, char_vector_dict=char_vector_dict, max_len=MAX_TOKEN_LEN) print(most_comm_X.shape, '!!!') elif LEVEL == 'word': model = build_model(vocab_size=len(word_to_int), embed_dim=50, level=LEVEL, token_len=None, token_char_vector_dict=None, nb_recurrent_layers=None) model.summary() sampling_table = make_sampling_table(size=len(word_to_int)) for e in range(NB_EPOCHS): idx = 0 losses = [] for idx in range(WINDOW_SIZE, len(words)-WINDOW_SIZE): seq = [] for w in words[(idx - WINDOW_SIZE): (idx + WINDOW_SIZE)]: try: seq.append(word_to_int[w]) except KeyError: seq.append(0) couples, labels = skipgrams(seq, len(word_to_int), window_size=4, negative_samples=1., shuffle=True, categorical=False, sampling_table=sampling_table) if len(couples) > 1: couples = np.array(couples, dtype='int32') c_inp = couples[:, 1] c_inp = c_inp[:, np.newaxis] if LEVEL == 'word': p_inp = couples[:, 0] p_inp = p_inp[:, np.newaxis] elif LEVEL == 'char': tokens = [int_to_word[i] for i in couples[:, 0]] p_inp = vectorize_tokens(tokens=tokens, char_vector_dict=char_vector_dict, max_len=MAX_TOKEN_LEN) else: raise ValueError('Wrong level param: word or char') labels = np.array(labels, dtype='int32') loss = model.train_on_batch({'pivot': p_inp, 'context': c_inp}, {'label': labels}) losses.append(loss) if idx % 5000 == 0: print(np.mean(losses)) if idx % 10000 == 0: print(np.mean(losses)) print('Compiling repr func') get_activations = K.function([model.layers[0].input, K.learning_phase()], [model.layers[6].output, ]) activations = get_activations([most_comm_X, 0])[0] activations = np.array(activations, dtype='float32') print(activations.shape, '-----') norm_weights = np_utils.normalize(activations) # dimension reduction: tsne = TSNE(n_components=2) coor = tsne.fit_transform(norm_weights) plt.clf() sns.set_style('dark') sns.plt.rcParams['axes.linewidth'] = 0.4 fig, ax1 = sns.plt.subplots() labels = most_comm # first plot slices: x1, x2 = coor[:, 0], coor[:, 1] ax1.scatter(x1, x2, 100, edgecolors='none', facecolors='none') # clustering on top (add some colouring): clustering = AgglomerativeClustering(linkage='ward', affinity='euclidean', n_clusters=10) clustering.fit(coor) # add names: axes = zip(x1, x2, most_comm, clustering.labels_) for x, y, name, cluster_label in axes: ax1.text(x, y, name, ha='center', va="center", color=plt.cm.spectral(cluster_label / 10.), fontdict={'family': 'Arial', 'size': 8}) # control aesthetics: ax1.set_xlabel('') ax1.set_ylabel('') ax1.set_xticklabels([]) ax1.set_xticks([]) ax1.set_yticklabels([]) ax1.set_yticks([]) sns.plt.savefig('embeddings.pdf', bbox_inches=0)
'DDOS attack-HOIC': 'DDoS-Attack' } train_csv = './dataset/idsX_train_clean.csv' df = pd.read_csv(train_csv) df = df.dropna() df = shuffle(df) df['Label'].replace(mask_label, inplace=True) y = df.pop('Label') X = df.drop(columns=dropped_cols, axis=1) del [df] X[X < 0] = 0 encoder = LabelEncoder() y = encoder.fit_transform(y) data_y = to_categorical(y) data_x = normalize(X.to_numpy()) del [X, y] inputDim = len(data_x[0]) outputDim = data_y.shape[1] print(data_y.shape) model = get_model(inputDim, outputDim) model.summary() model_json = model.to_json() with open(PATH + "dnn-model.json", "w") as json_file: json_file.write(model_json) plot_model(model, to_file=PATH + 'model-dnn.png', show_layer_names=True, show_shapes=True) train_x, val_x, train_y, val_y = train_test_split(data_x,
def process(args): print "Loading graph..." if args.format == "adjlist": G = graph.load_adjacencylist(args.input, undirected=args.undirected) elif args.format == "edgelist": G = graph.load_edgelist(args.input, undirected=args.undirected) elif args.format == "mat": G = graph.load_matfile(args.input, variable_name=args.matfile_variable_name, undirected=args.undirected) else: raise Exception( "Unknown file format: '%s'. Valid formats: 'adjlist', 'edgelist', 'mat'" % args.format) print("Number of nodes: {}".format(len(G.nodes()))) num_walks = len(G.nodes()) * args.number_walks print("Number of walks: {}".format(num_walks)) data_size = num_walks * args.walk_length print("Data size (walks*length): {}".format(data_size)) if data_size < args.max_memory_data_size: #print("Walking...") #walks = graph.build_deepwalk_corpus(G, num_paths=args.number_walks, # path_length=args.walk_length, alpha=0, rand=random.Random(args.seed)) print("Training...") max_features = len(G.nodes()) # vocabulary size dim_proj = args.representation_size # embedding space dimension nb_epoch = 1 # number of training epochs # Neural network ( in Keras ) model = Sequential() model.add( WordContextProduct(max_features, proj_dim=dim_proj, init="uniform")) model.compile(loss='mse', optimizer='rmsprop') sampling_table = sequence.make_sampling_table(max_features) print("Fitting tokenizer on walks...") tokenizer = text.Tokenizer(nb_words=max_features) print "Epochs: %d" % nb_epoch #tokenizer.fit_on_texts( build_deepwalk_corpus_minibatch_iter(G, args.number_walks, args.walk_length)) for e in range(nb_epoch): print('-' * 40) print('Epoch', e) print('-' * 40) #progbar = generic_utils.Progbar(tokenizer.document_count) samples_seen = 0 losses = [] # for i, seq in enumerate(tokenizer.texts_to_sequences_generator( build_deepwalk_corpus_minibatch_iter(G, args.number_walks, args.walk_length) )): for i, seq in enumerate( build_deepwalk_corpus_minibatch_iter( G, args.number_walks, args.walk_length)): # get skipgram couples for one text in the dataset couples, labels = sequence.skipgrams( seq, max_features, window_size=5, negative_samples=1., sampling_table=sampling_table) if couples: # one gradient update per sentence (one sentence = a few 1000s of word couples) X = np.array(couples, dtype="int32") print "Started fitting..." loss = model.fit(X, labels) print "Dumping..." # Dump weights to a temp file weights = model.layers[0].get_weights()[0] norm_weights = np_utils.normalize(weights) # TODO: save weights with indices np.savetxt(args.output, norm_weights) losses.append(loss) if len(losses) % 100 == 0: # progbar.update(i, values=[("loss", np.mean(losses))]) losses = [] samples_seen += len(labels) print('Samples seen:', samples_seen) print("Training completed!") else: print( "Data size {} is larger than limit (max-memory-data-size: {}). Dumping walks to disk." .format(data_size, args.max_memory_data_size)) print("Walking...") #TODO: IMPLEMENT THAT print "Not implemented yet..." sys.exit(1) print "Optimization done. Saving..." # recover the embedding weights trained with skipgram: weights = model.layers[0].get_weights()[0] # we no longer need this del model norm_weights = np_utils.normalize(weights) # TODO: save weights with indices np.savetxt(args.output, norm_weights) print "Saved!"
def read_img(image_path_list): x_dataset = np.array( [image_process.image_process(x) for x in image_path_list]) x_dataset = np_utils.normalize(x_dataset) return x_dataset
n_FC = n_FCls[(param_ind//54)%3] param_ind2 = 249 batchSizels = [32,64,128,256] learningRatels = [1e-6,5e-6,1e-5,5e-5,1e-4,5e-4,1e-3] DropoutRatels1 = [0.0,0.2,0.3] DropoutRatels2 = [0.3,0.5,0.6] batchSize = batchSizels[param_ind2%4] learningRate=learningRatels[(param_ind2//4)%7] DropoutRate1 = DropoutRatels1[(param_ind2//28)%3] DropoutRate2 = DropoutRatels2[(param_ind2//84)%3] # Batch Size: 64|Learning Rate: 0.001|DropoutRate1: 0.3|DropoutRate2: 0.6 X_enhancers = np.load(file_path+cell_line+'/K562enhancer_50_10.npy') X_promoters = np.load(file_path+cell_line+'/K562promoter_50_10.npy') labels = np.load(file_path+cell_line+'/K562_labels.npy') X_enhancers=np_utils.normalize(X_enhancers,axis=0) X_promoters=np_utils.normalize(X_promoters,axis=0) def f1(y_true, y_pred): def recall(y_true, y_pred): """Recall metric. Only computes a batch-wise average of recall. Computes the recall, a metric for multi-label classification of how many relevant items are selected. """ true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) possible_positives = K.sum(K.round(K.clip(y_true, 0, 1))) recall = true_positives / (possible_positives + K.epsilon()) return recall def precision(y_true, y_pred): """Precision metric.
print("Fold: " + str(fold)); C = Cs[fold]; gamma = gammas[fold]; BM = Best_model print "Loading Features" curSupervecPath = os.path.join(supervecPath, "trainset_" + str(fold), str(nMixtures)); V_feat = utl.readfeatures(curSupervecPath, V) O_feat = utl.readfeatures(curSupervecPath, O) T_feat = utl.readfeatures(curSupervecPath, T) E_feat = utl.readfeatures(curSupervecPath, E) X_t = np.concatenate((T_feat,E_feat),axis=0) X_train = normalize(X_t) input_shape = X_train.shape[1] dropout_rate = 0.25 opt = Adam(lr=1e-4) #Generator optimizer dopt = Adam(lr=1e-3) #Discriminator optimizer # Build Generative model ... g_input = Input(shape=(input_shape,)) x = g_input for i in range(len(args.gen_layers_shape)): x = Dense(args.gen_layers_shape[i], init=args.init, activation=args.gen_activation, bias=args.bias)(x)
def process(args): print "Loading graph..." if args.format == "adjlist": G = graph.load_adjacencylist(args.input, undirected=args.undirected) elif args.format == "edgelist": G = graph.load_edgelist(args.input, undirected=args.undirected) elif args.format == "mat": G = graph.load_matfile(args.input, variable_name=args.matfile_variable_name, undirected=args.undirected) else: raise Exception("Unknown file format: '%s'. Valid formats: 'adjlist', 'edgelist', 'mat'" % args.format) print("Number of nodes: {}".format(len(G.nodes()))) num_walks = len(G.nodes()) * args.number_walks print("Number of walks: {}".format(num_walks)) data_size = num_walks * args.walk_length print("Data size (walks*length): {}".format(data_size)) if data_size < args.max_memory_data_size: #print("Walking...") #walks = graph.build_deepwalk_corpus(G, num_paths=args.number_walks, # path_length=args.walk_length, alpha=0, rand=random.Random(args.seed)) print("Training...") max_features = len(G.nodes()) # vocabulary size dim_proj = args.representation_size # embedding space dimension nb_epoch = 1 # number of training epochs # Neural network ( in Keras ) model = Sequential() model.add(WordContextProduct(max_features, proj_dim=dim_proj, init="uniform")) model.compile(loss='mse', optimizer='rmsprop') sampling_table = sequence.make_sampling_table(max_features) print("Fitting tokenizer on walks...") tokenizer = text.Tokenizer(nb_words=max_features) print "Epochs: %d" % nb_epoch #tokenizer.fit_on_texts( build_deepwalk_corpus_minibatch_iter(G, args.number_walks, args.walk_length)) for e in range(nb_epoch): print('-'*40) print('Epoch', e) print('-'*40) #progbar = generic_utils.Progbar(tokenizer.document_count) samples_seen = 0 losses = [] # for i, seq in enumerate(tokenizer.texts_to_sequences_generator( build_deepwalk_corpus_minibatch_iter(G, args.number_walks, args.walk_length) )): for i, seq in enumerate( build_deepwalk_corpus_minibatch_iter(G, args.number_walks, args.walk_length) ): # get skipgram couples for one text in the dataset couples, labels = sequence.skipgrams(seq, max_features, window_size=5, negative_samples=1., sampling_table=sampling_table) if couples: # one gradient update per sentence (one sentence = a few 1000s of word couples) X = np.array(couples, dtype="int32") print "Started fitting..." loss = model.fit(X, labels) print "Dumping..." # Dump weights to a temp file weights = model.layers[0].get_weights()[0] norm_weights = np_utils.normalize(weights) # TODO: save weights with indices np.savetxt( args.output, norm_weights ) losses.append(loss) if len(losses) % 100 == 0: # progbar.update(i, values=[("loss", np.mean(losses))]) losses = [] samples_seen += len(labels) print('Samples seen:', samples_seen) print("Training completed!") else: print("Data size {} is larger than limit (max-memory-data-size: {}). Dumping walks to disk.".format(data_size, args.max_memory_data_size)) print("Walking...") #TODO: IMPLEMENT THAT print "Not implemented yet..." sys.exit(1) print "Optimization done. Saving..." # recover the embedding weights trained with skipgram: weights = model.layers[0].get_weights()[0] # we no longer need this del model norm_weights = np_utils.normalize(weights) # TODO: save weights with indices np.savetxt( args.output, norm_weights ) print "Saved!"
def experiment(dataFile, optimizer='adam', epochs=10, batch_size=10): #Creating data for analysis time_gen = int(time.time()) global model_name model_name = f"{dataFile}_{time_gen}" #$ tensorboard --logdir=logs/ tensorboard = TensorBoard(log_dir='logs/{}'.format(model_name)) seed = 7 np.random.seed(seed) cvscores = [] print('optimizer: {} epochs: {} batch_size: {}'.format( optimizer, epochs, batch_size)) data = loadData(dataFile) data_y = data.pop('Label') #transform named labels into numerical values encoder = LabelEncoder() encoder.fit(data_y) data_y = encoder.transform(data_y) dummy_y = to_categorical(data_y) data_x = normalize(data.values) #define 5-fold cross validation test harness inputDim = len(data_x[0]) print('inputdim = ', inputDim) #Separate out data #X_train, X_test, y_train, y_test = train_test_split(data_x, dummy_y, test_size=0.2) num = 0 sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=7) start = timer() for train_index, test_index in sss.split(X=np.zeros(data_x.shape[0]), y=dummy_y): X_train, X_test = data_x[train_index], data_x[test_index] y_train, y_test = dummy_y[train_index], dummy_y[test_index] #create model model = baseline_model(inputDim, y_train.shape) #train print("Training " + dataFile + " on split " + str(num)) model.fit(x=X_train, y=y_train, epochs=epochs, batch_size=batch_size, verbose=2, callbacks=[tensorboard], validation_data=(X_test, y_test)) #save model model.save(f"{resultPath}/models/{model_name}.model") num += 1 elapsed = timer() - start scores = model.evaluate(X_test, y_test, verbose=1) print(model.metrics_names) acc, loss = scores[1] * 100, scores[0] * 100 print('Baseline: accuracy: {:.2f}%: loss: {:.2f}'.format(acc, loss)) resultFile = os.path.join(resultPath, dataFile) with open('{}.result'.format(resultFile), 'a') as fout: fout.write('{} results...'.format(model_name)) fout.write('\taccuracy: {:.2f} loss: {:.2f}'.format(acc, loss)) fout.write('\telapsed time: {:.2f} sec\n'.format(elapsed))
def train_pixelnet(dataset, batchsize, npix, max_epochs, validation_steps, run_id, bottleneck): datadir = 'data' datafile = os.path.join(datadir, '{}.h5'.format(dataset)) validation_set_path = os.path.join(datadir, '{}-validation-sets.json'.format(dataset)) validation_set = data.load_validation_set(validation_set_path, run_id) if dataset == 'uhcs': nclasses = 4 cropbar = 38 elif dataset == 'spheroidite': nclasses = 2 cropbar = None model_dir = os.path.join('models', 'crossval', dataset, 'run{:02d}'.format(run_id)) if not os.path.isdir(model_dir): os.makedirs(model_dir) images, labels, names = data.load_dataset(datafile, cropbar=cropbar) images = data.preprocess_images(images, equalize=True, tf=False) # add a channel axis (of size 1 since these are grayscale inputs) images = images[:,:,:,np.newaxis] images = np.repeat(images, 3, axis=-1) images = applications.vgg16.preprocess_input(images) # train/validation split train_idx, val_idx = data.validation_split(validation_set, names) ntrain = len(train_idx) X_train, y_train, names_train = images[train_idx], labels[train_idx], names[train_idx] X_val, y_val, names_val = images[val_idx], labels[val_idx], names[val_idx] inv_freq = y_train.size / np.bincount(y_train.flat) class_weights = np.squeeze(normalize(np.sqrt(inv_freq), order=1)) # don't use alpha-balanced version of focal loss... # class_weights = None focus_param = 2.0 # write the validation set to the model directory as well... with open(os.path.join(model_dir, 'validation_set.txt'), 'w') as vf: for name in names_val: print(name, file=vf) N, h, w, _ = images.shape steps_per_epoch = int(ntrain / batchsize) print('steps_per_epoch: {}'.format(steps_per_epoch)) max_epochs = 25 validation_steps = 10 base_model = vgg.fully_conv_model() layernames = [ 'block1_conv2_relu', 'block2_conv2_relu', 'block3_conv3_relu', 'block4_conv3_relu', 'block5_conv3_relu', 'fc2_relu' ] hc = hypercolumn.build_model(base_model, layernames, batchnorm=True, mode='sparse', relu=False) model = pixelnet.build_model(hc, nclasses=nclasses, width=1024, mode='sparse', dropout_rate=0.1, l2_reg=0.0) opt = adamw.AdamW(lr=1e-3, weight_decay=5e-4, amsgrad=True) for layer in base_model.layers: layer.trainable = False # model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['acc']) model.compile( loss=losses.focal_crossentropy_loss(focus_param=focus_param, class_weights=class_weights), optimizer=opt, metrics=['acc'] ) csv_logger = callbacks.CSVLogger(os.path.join(model_dir, 'training-1.log')) checkpoint = callbacks.ModelCheckpoint( os.path.join( model_dir, 'weights.{epoch:03d}-{val_loss:.4f}.hdf5' ), save_best_only=False, save_weights_only=True, period=25 ) training_data = px_utils.random_pixel_samples( X_train, y_train, nclasses=nclasses, replace_samples=False, horizontal_flip=True, vertical_flip=True, rotation_range=360, zoom_range=0.5, intensity_shift=0.05 ) f = model.fit_generator( training_data, steps_per_epoch, epochs=max_epochs, callbacks=[csv_logger, checkpoint], validation_data=px_utils.random_pixel_samples(X_val, y_val, nclasses=nclasses, replace_samples=False), validation_steps=validation_steps, ) for layer in base_model.layers: layer.trainable = True # fine-tune the whole network opt = adamw.AdamW(lr=1e-5, weight_decay=5e-4, amsgrad=True) model.compile( loss=losses.focal_crossentropy_loss(focus_param=focus_param, class_weights=class_weights), optimizer=opt, metrics=['acc'] ) csv_logger = callbacks.CSVLogger(os.path.join(model_dir, 'finetune-1.log')) checkpoint = callbacks.ModelCheckpoint( os.path.join( model_dir, 'weights-finetune.{epoch:03d}-{val_loss:.4f}.hdf5' ), save_best_only=False, save_weights_only=True, period=25 ) f = model.fit_generator( training_data, steps_per_epoch, epochs=max_epochs, callbacks=[csv_logger, checkpoint], validation_data=px_utils.random_pixel_samples(X_val, y_val, nclasses=nclasses, replace_samples=False), validation_steps=validation_steps, )
def evaluate_tf(model,X_enhancers,X_promoters, labels): X_enhancers=np_utils.normalize(X_enhancers,axis=0) X_promoters=np_utils.normalize(X_promoters,axis=0) evaluate(model,(X_enhancers,X_promoters),labels)
model.save_weights(os.path.join(save_dir, model_save_fname), overwrite=True) if test_model: print("It's test time!") print('Load model...') model.load_weights(os.path.join(save_dir, model_load_fname)) # recover the embedding weights trained with skipgram: weights = model.layers[0].get_weights()[0] # we no longer need this del model weights[:skip_top] = np.zeros((skip_top, dim_proj)) norm_weights = np_utils.normalize(weights) word_index = tokenizer.word_index reverse_word_index = dict([(v, k) for k, v in list(word_index.items())]) word_index = tokenizer.word_index def embed_word(w): i = word_index.get(w) if (not i) or (i < skip_top) or (i >= max_features): return None return norm_weights[i] def closest_to_point(point, nb_closest=10): proximities = np.dot(norm_weights, point) tups = list(zip(list(range(len(proximities))), proximities)) tups.sort(key=lambda x: x[1], reverse=True)
printArrayInfo(trainImages, trainLabels) #showImageAndLabel(trainImages,trainLabels,144) #Reshaping the array to 4-dims so that it can work with the Keras API (not necessary) #trainImages = trainImages.reshape(trainImages.shape[0],28,28,1) #testImages = testImages.reshape(testImages.shape[0],28,28,1) # Making sure that the values are float trainImages = trainImages.astype('float32') testImages = testImages.astype('float32') ''' Normalizing the RGB codes by dividing it to the max RGB value. trainImages /= 255 testImages /= 255 ''' trainImages = normalize(trainImages) testImages = normalize(testImages) #Categorize the labels (not required here...) #trainLabels = np_utils.to_categorical(trainLabels,10) #testLabels = np_utils.to_from keras.utils.np_utils import to_categorical, normalizeategorical(testLabels,10) #Building the Convolutionafrom keras.utils.np_utils import to_categorical, normalize Neural Network (CNN) model = Sequential([ #transforms the format of the images from a 2d-array (of 28 by 28 pixels), to a 1d-array of 28 * 28 = 784 pixels Flatten(input_shape=(28, 28)), #Densely-connected, or fully-connected, neural layers. Dense(128, activation=tf.nn.relu), Dense(128, activation=tf.nn.relu), Dense(10, activation=tf.nn.softmax) ])
print 'Epoch:', e progbar = generic_utils.Progbar(tokenizer.document_count) samples_seen, losses = 0, [] for i, seq in enumerate(tokenizer.texts_to_sequences_generator(text_generator())): couples, labels = sequence.skipgrams(seq, max_features, window_size=4, negative_samples=1., sampling_table=sampling_table) if couples: X = np.array(couples, dtype="int32") loss = model.train_on_batch(X, labels) losses.append(loss) if len(losses) % 100 == 0: progbar.update(i, values=[("loss", np.mean(losses))]) losses = [] samples_seen += len(labels) weights = model.layers[0].get_weights()[0] weights[:skip_top] = np.zeros((skip_top, dim_proj)) norm_weights = np_utils.normalize(weights) del model word_index = tokenizer.word_index reverse_word_index = dict([(v, k) for k, v in list(word_index.items())]) # ----- 测试 ----- words = ["我"] for w in words: print '='*4, w, '='*4 for r in closest_to_word(w): print r[0], r[1]
print(keras.__version__) # 4. Load pre-shuffled MNIST data into train and test sets (X_train, y_train), (X_test, y_test) = mnist.load_data() # 5. Preprocess input data #X_train = X_train.reshape(X_train.shape[0], 1, 28, 28) #X_test = X_test.reshape(X_test.shape[0], 1, 28, 28) #X_train = X_train.astype('float32') #X_test = X_test.astype('float32') #X_train /= 255 #X_test /= 255 # 6. Preprocess class labels X_train = np_utils.normalize(X_train, axis=1) X_test = np_utils.normalize(X_test, axis=1) Y_train = np_utils.to_categorical(y_train, 10) Y_test = np_utils.to_categorical(y_test, 10) # 7. Define model architecture model = Sequential() #model.add(Convolution2D(32, 3, 3, activation='relu', input_shape=(1,28,28))) #model.add(Convolution2D(32, 3, 3, activation='relu')) #model.add(MaxPooling2D(pool_size=(2,2))) #model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(128, activation='relu'))