def prepareTrainData(path = ['./data/content/objnet/airplane/test'], ratio=0, shape='3D', lean=True, fetch_faces=False): if (fetch_faces == True): X, Y, faces = prepareData(path, lean=lean, fetch_faces=True) else: X, Y = prepareData(path, lean=lean, fetch_faces=False) if (ratio > 0 and ratio < 1): outX = [] outY = [] out_faces = [] for i in np.random.choice(len(X), size=int(ratio*len(X)), replace=False): outX.append(X[i]) outY.append(Y[i]) if (fetch_faces == True): out_faces.append(faces[i]) X = outX Y = outY faces = out_faces n_channels = 6 if (lean == True): n_channels = 1 if (shape == '3D'): X = tf.constant(X, shape=[len(X), n_channels, 400, 400, 1]) else: X = tf.constant(X, shape=[len(X), n_channels, 400, 400]) if (fetch_faces==True): return X, Y, faces else: return X, Y
def mnb(): train_X, train_Y, test_X, test_Y = prepareData(word_frequency=5) clf = MultinomialNB() clf.fit(train_X, train_Y) train_pred = clf.predict(train_X) test_pred = clf.predict(test_X) train_acc = accuracy_score(train_Y, train_pred) test_acc = accuracy_score(test_Y, test_pred) print('Train Acc: ', train_acc) print('Test Acc: ', test_acc)
def svm(word_frequency, C, kernel, poly_degree): train_X, train_Y, test_X, test_Y = prepareData(word_frequency) clf = SVC(C=C, kernel=kernel, degree=poly_degree) clf.fit(train_X, train_Y) train_pred = clf.predict(train_X) test_pred = clf.predict(test_X) train_acc = accuracy_score(train_Y, train_pred) test_acc = accuracy_score(test_Y, test_pred) print('Train Acc: ', train_acc) print('Test Acc: ', test_acc)
def prepareTrainData(path=['./data/content/objnet/airplane/test'], ratio=0): X, Y, faces = prepareData(path, lean=True, fetch_faces=True) if (ratio > 0 and ratio < 1): outX = [] outY = [] out_faces = [] for i in np.random.choice(len(X), size=int(ratio * len(X)), replace=False): outX.append(X[i]) outY.append(Y[i]) out_faces.append(faces[i]) X = outX Y = outY faces = out_faces X = tf.constant(X, shape=[len(X), 1, 400, 400]) return X, Y, faces
use_cuda = torch.cuda.is_available() use_cuda = False if use_cuda: torch.cuda.manual_seed(0) torch.cuda.manual_seed_all(0) from prepare_data import prepareData, SOS_token, EOS_token, MAX_LENGTH from network import EncoderRNN, DecoderRNN, AttnDecoderRNN from bleu import get_bleu_score ###################################################################### # Prepare Data # ----------- # input_lang, output_lang, pairs = prepareData('eng', 'fra', True) random.shuffle(pairs) train_pairs = pairs[:8000] dev_pairs = pairs[8000:] ###################################################################### # Training # ======== # # Preparing Training Data # ----------------------- # # To train, for each pair we will need an input tensor (indexes of the # words in the input sentence) and target tensor (indexes of the words in # the target sentence). While creating these vectors we will append the # EOS token to both sequences.
import pandas as pd from prepare_data import prepareData from train_ann import ann if __name__ == '__main__': X = [[56, 1, 1, 120, 236, 0, 1, 178, 0, 0.8, 2, 0, 2]] X_df = pd.DataFrame(X, columns=[ 'age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal' ]) X_train, X_test, y_train, y_test, scaler = prepareData() model = ann(X_train, y_train, X_test, y_test) X_scaled = scaler.transform(X_df) result = model.predict(X_scaled) if result == 1: print("heart attack") elif result == 0: print("no heart attack")
def main(arg): use_context = arg.use_context == 1 if arg.use_qualified_name == 1: use_full_path = arg.use_full_path == 1 input_lang, output_lang, train_data = prepareDataWithFileName(arg.train_data_qualified, use_full_path, use_context=use_context) _, _, dev_data = prepareDataWithFileName(arg.dev_data_qualified, use_full_path, use_context=use_context) _, _, test_data = prepareDataWithFileName(arg.test_data_qualified, use_full_path, use_context=use_context) else: input_lang, output_lang, train_data = prepareData(arg.train_data, use_context=use_context) _, _, dev_data = prepareData(arg.dev_data, use_context=use_context) _, _, test_data = prepareData(arg.test_data, use_context=use_context) train_data = [process_data(d, input_lang, output_lang) for d in train_data] ''' weight = [10 * math.sqrt(1.0/output_lang.token_to_count[output_lang.idx_to_token[x]]) for x in output_lang.idx_to_token if x > arrow_token] weight = [0] * (arrow_token + 1) + weight loss_weight = torch.FloatTensor(weight) ''' model = Model(input_lang.n_word, output_lang.n_word, arg.embed_size, arg.hidden_size, output_lang.kind_dict, dropout_p=arg.dropout, topo_loss_factor=arg.topo_loss_factor, rec_depth=arg.rec_depth, weight=None) model = model.to(device) optimizer = optim.Adam(model.parameters(), lr=arg.lr) best_accuracy = 0 best_model = model.state_dict() epoch_start = 0 checkpoint_num = 0 print("Start training...") if arg.resume is not None: print("loading from {}".format(arg.resume)) checkpoint = torch.load(arg.checkpoint_dir + arg.resume) model.load_state_dict(checkpoint["model_state"]) optimizer.load_state_dict(checkpoint["optimizer_state"]) best_accuracy = checkpoint["best_acc"] epoch_start = checkpoint["epoch"] checkpoint_num = checkpoint["checkpoint"] for epoch in range(epoch_start, arg.num_epoch): try: epoch_loss = 0 print("epoch {}/{}".format(epoch+1, arg.num_epoch)) epoch_loss = train(train_data[checkpoint_num*10000:], model, optimizer, epoch=epoch, checkpoint_base=checkpoint_num, best_accuracy=best_accuracy, dev_data=dev_data, input_lang=input_lang, output_lang=output_lang) checkpoint_num = 0 print("train loss: {:.4f}".format(epoch_loss)) dev_loss, accuracy, structural_acc = eval(dev_data, input_lang, output_lang, model) print("dev loss: {:.4f} accuracy: {:.4f} structural accuracy: {:.4f}".format(dev_loss, accuracy, structural_acc)) randomEval(dev_data, model, input_lang, output_lang) if accuracy > best_accuracy: best_accuracy = accuracy best_model = model.state_dict() torch.save(best_model, arg.model_state_file) except KeyboardInterrupt: print("Keyboard Interruption.") break print("best accuracy: {:.4f}".format(best_accuracy)) print("Start testing...") model.load_state_dict(torch.load(arg.model_state_file)) accuracy, structural_acc = eval_test(test_data, input_lang, output_lang, model) print("test accuracy: {:.4f} structural accuracy: {:.4f}".format(accuracy, structural_acc))
def main(arg): use_context = arg.use_qualified_name == 1 if use_context: input_lang, output_lang, train_data = prepareDataWithFileName( arg.train_data_qualified, use_context=True) _, _, dev_data = prepareDataWithFileName(arg.dev_data_qualified, use_context=True) _, _, test_data = prepareDataWithFileName(arg.test_data_qualified, use_context=True) else: input_lang, output_lang, train_data = prepareData(arg.train_data) _, _, dev_data = prepareData(arg.dev_data) _, _, test_data = prepareData(arg.test_data) #output_lang.trim_tokens(threshold=2) print("Input vocab size: {}".format(input_lang.n_word)) print("Target vocab size: {}".format(output_lang.n_word)) batch_object = Batch(arg.batch_size, input_lang, output_lang, use_context=use_context) train_data = map(lambda p: batch_object.variableFromBatch(p), batch_object.batchify(train_data)) encoder = Encoder(input_lang.n_word, arg.embed_size, arg.hidden_size) context_encoder = ContextEncoder(output_lang.n_word, arg.embed_size, arg.hidden_size) decoder = ContextAttnDecoder(output_lang.n_word, arg.embed_size, arg.hidden_size) if use_cuda: encoder = encoder.cuda() context_encoder = context_encoder.cuda() decoder = decoder.cuda() learning_rate = 3e-4 encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate) context_optimizer = optim.Adam(context_encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate) encoder_optimizer_scheduler = optim.lr_scheduler.ReduceLROnPlateau( encoder_optimizer, patience=3, verbose=True, factor=0.5) context_optimizer_scheduler = optim.lr_scheduler.ReduceLROnPlateau( context_optimizer, patience=3, verbose=True, factor=0.5) decoder_optimizer_scheduler = optim.lr_scheduler.ReduceLROnPlateau( decoder_optimizer, patience=3, verbose=True, factor=0.5) criterion = nn.NLLLoss(reduce=False) best_accuracy = 0 best_loss = float('inf') best_model = (encoder.state_dict(), context_encoder.state_dict(), decoder.state_dict()) print("Start training...") for epoch in range(arg.num_epoch): try: print("epoch {}/{}".format(epoch + 1, arg.num_epoch)) epoch_loss = train(train_data, batch_object, encoder, context_encoder, decoder, encoder_optimizer, context_optimizer, decoder_optimizer, criterion) print("train loss: {:.4f}".format(epoch_loss)) dev_loss, accuracy = eval(dev_data, batch_object, encoder, context_encoder, decoder, criterion) print("dev loss: {:.4f} accuracy: {:.4f}".format( dev_loss, accuracy)) encoder_optimizer_scheduler.step(dev_loss) context_optimizer_scheduler.step(dev_loss) decoder_optimizer_scheduler.step(dev_loss) randomEval(dev_data, batch_object, encoder, context_encoder, decoder) if accuracy > best_accuracy: best_accuracy = accuracy best_model = (encoder.state_dict(), context_encoder.state_dict(), decoder.state_dict()) torch.save(best_model[0], arg.encoder_state_file) torch.save(best_model[1], arg.context_encoder_state_file) torch.save(best_model[2], arg.decoder_state_file) except KeyboardInterrupt: print("Keyboard Interruption.") break print("best accuracy: {:.4f}".format(best_accuracy)) print("Start testing...") encoder.load_state_dict(torch.load(arg.encoder_state_file)) context_encoder.load_state_dict(torch.load(arg.context_encoder_state_file)) decoder.load_state_dict(torch.load(arg.decoder_state_file)) test_accuracy = eval_test(test_data, batch_object, encoder, context_encoder, decoder) print("test accuracy: {:.4f}".format(test_accuracy))
loadedModelJson = jsonFile.read() jsonFile.close() loadedModel = model_from_json(loadedModelJson) # Load the weights from the file that was save in the training stage loadedModel.load_weights("./model.h5") print("Model is loaded") # Compile the model loadedModel.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) print("Model is compiled") # Prepare the data of the input image tagsCoordsList = prepare_data.prepareData(inputPath, outputPath, csvFileName) # Get the image, display it and classify all vehicles in the image listOfFile = os.listdir(inputPath) for imageFileName in listOfFile: # Clear the previous list of classified tags classifiedVehiclesInImg.clear() # Read the image and display it fullFileName = inputPath + imageFileName print("Input image: " + fullFileName) # get the image id imageFileNameNoPrefix = imageFileName.split(".") imageId = imageFileNameNoPrefix[0]
#!/usr/bin/python import mapping import prepare_data dataFolderPath = "../voxforge/" storagePlace = "../output" dialect = "American English|British English|European English|Canadian English" fileList = mapping.mapData(dataFolderPath, storagePlace, dialect) prepare_data.prepareData(dataFolderPath, storagePlace, fileList, int(len(fileList) * 1))