def main(args): torch.manual_seed(args.seed) train_loader, test_loader = get_train_data(args.data_root_path, args.batch_size, args.test_batch_size) model = MnistCNNModel() train_and_test(model, train_loader, test_loader, args.epochs, args.lr, args.momentum, args.log_interval) torch.save(model.state_dict(), args.model_file)
def train_evaluate(): print_dict('Experiment params:', params) classifier = get_classifier() print('Training ({})...'.format(param('classifier_type'))) train_start = time.time() classifier.train(get_input_fn(data.get_train_data())) train_end = time.time() print('Training completed in {} seconds'.format(train_end - train_start)) print('Evaluating...') result = classifier.evaluate(get_input_fn(data.get_eval_data())) print_dict('Evaluation result:', result)
def load_data(use_cached=True): if use_cached and path.isfile(CACHE_FILENAME): data = joblib.load(open(CACHE_FILENAME, 'rb')) X_train = data['X_train'] y_train = data['y_train'] vectorizer = data['vectorizer'] else: X_train, y_train, vectorizer = get_train_data() data = { 'X_train': X_train, 'y_train': y_train, 'vectorizer': vectorizer } joblib.dump(data, open(CACHE_FILENAME, 'wb'), compress=True) return X_train, y_train, vectorizer
def main(): print('=====> Loading Dataset') train_set, val_set = get_train_data() train_loader = DataLoader(dataset=train_set,batch_size= Config.BATCH_TRAIN, shuffle=True) val_loader = DataLoader(dataset= val_set, batch_size= Config.BATCH_VAL, shuffle= True) data_loader = {'train':train_loader, 'val':val_loader} print('=====> Building Model') model = resnet18() criterion = nn.BCELoss() if Config.PHASE == 'Train' or Config.PHASE == 'train': print('=====> start training') _ = train(data_loader,model,criterion) print('=====finished train=====') elif Config.PHASE == 'predict' or Config.PHASE == 'Predict': print('predict') pass
def train_line(model, args): model.to(args.device) model.train() bce_loss = torch.nn.BCEWithLogitsLoss() mse_loss = torch.nn.MSELoss() optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 100, gamma=0.99) avg_loss = 0 loss_history = numpy.zeros(args.max_epoch) loss_history_file = "loss_history_line/{}.txt".format(datetime.now().strftime("%Y%m%d-%H%M")) saves_folder = "saves_line/MODEL-{}".format(datetime.now().strftime("%Y%m%d-%H%M")) os.makedirs("loss_history_line/", exist_ok=True) os.makedirs("saves_line/", exist_ok=True) for epoch in range(1, args.max_epoch + 1): start = time.time() optimizer.zero_grad() data, truth = get_train_data(args.batch_size, args.device) mask = truth[:, :, 0].byte() preds = model(data) loss_c = bce_loss(preds[:, :, 0], truth[:, :, 0]) loss_o = mse_loss(preds[:, :, 1][mask], truth[:, :, 1][mask]) loss_s = mse_loss(preds[:, :, 2][mask], truth[:, :, 2][mask]) loss = loss_c + loss_o + loss_s loss.backward() optimizer.step() scheduler.step() avg_loss = 0.9 * avg_loss + 0.1 * loss.item() loss_history[epoch - 1] = avg_loss print( "#{:04d} ".format(epoch), "| LR: {:.1e}".format(scheduler.get_lr()[0]), "| Loss: {:.2e} ({:.2e}, {:.2e}, {:.2e})".format( avg_loss, loss_c, loss_o, loss_s ), "| Range: ({:.2e}, {:.2e})".format( torch.sigmoid(preds[:, :, 0].min()).item(), torch.sigmoid(preds[:, :, 0].max()).item(), ), "| T: {:4.2f}s".format(time.time() - start), ) if args.checkpoint != 0 and epoch % args.checkpoint == 0: numpy.savetxt(loss_history_file, loss_history[0:epoch]) print("NOTE: Loss history available at {}".format(loss_history_file)) if args.eval_per != 0 and epoch % args.eval_per == 0: save_file = saves_folder + "-{:04d}.pth".format(epoch) torch.save(model.state_dict(), save_file) print("NOTE: Model state dict available at {}".format(save_file)) with torch.no_grad(): dirname = "results_line/eval_{}/".format(epoch) os.makedirs(dirname, exist_ok=True) model.eval() eval_data, eval_images = get_eval_data(args.batch_size, args.device) eval_preds = model(eval_data) if eval_preds.is_cuda: eval_preds = eval_preds.cpu().numpy() else: eval_preds = eval_preds.numpy() for i, (pred, image) in enumerate(zip(eval_preds, eval_images)): draw_pred_line(image, pred) image.save(dirname + "{}.png".format(i)) print("NOTE: Eval result available at {}".format(dirname)) model.train()
def main(): data = get_train_data() # Train data train, val = train_test_split(data, test_size=200, random_state=1) train += train train += train train += train len(train),len(val) # Valid data val_a = np.zeros((len(val),)+config.img_shape, dtype=K.floatx()) # Preprocess validation images val_b = np.zeros((len(val),4),dtype=K.floatx()) # Preprocess bounding boxes for i,(p,coords) in enumerate(tqdm(val)): img,trans = read_for_validation(p) coords = coord_transform(coords, mat_inv(trans)) x0,y0,x1,y1 = bounding_rectangle(coords, img.shape) val_a[i,:,:,:] = img val_b[i,0] = x0 val_b[i,1] = y0 val_b[i,2] = x1 val_b[i,3] = y1 # Train using cyclic learning rate for num in range(1, 4): model_name = 'cropping-%01d.h5' % num model = build_model() print(model_name) model.compile(Adam(lr=0.032), loss='mean_squared_error') model.fit_generator( TrainingData(train), epochs=50, max_queue_size=12, workers=4, verbose=1, validation_data=(val_a, val_b), callbacks=[ EarlyStopping(monitor='val_loss', patience=9, min_delta=0.1, verbose=1), ReduceLROnPlateau(monitor='val_loss', patience=3, min_delta=0.1, factor=0.25, min_lr=0.002, verbose=1), ModelCheckpoint(model_name, save_best_only=True, save_weights_only=True), ]) model.load_weights(model_name) model.evaluate(val_a, val_b, verbose=0) # Now choose which model to use model.load_weights('cropping-1.h5') loss1 = model.evaluate(val_a, val_b, verbose=0) model.load_weights('cropping-2.h5') loss2 = model.evaluate(val_a, val_b, verbose=0) model.load_weights('cropping-3.h5') loss3 = model.evaluate(val_a, val_b, verbose=0) model_name = 'cropping-1.h5' if loss2 <= loss1 and loss2 < loss3: model_name = 'cropping-2.h5' if loss3 <= loss1 and loss3 <= loss2: model_name = 'cropping-3.h5' model.load_weights(model_name) # Variance normalization model2 = build_model(with_dropout=False) model2.load_weights(model_name) model2.compile(Adam(lr=0.002), loss='mean_squared_error') model2.evaluate(val_a, val_b, verbose=0) # Recompute the mean and variance running average without dropout for layer in model2.layers: if not isinstance(layer, BatchNormalization): layer.trainable = False model2.compile(Adam(lr=0.002), loss='mean_squared_error') model2.fit_generator(TrainingData(), epochs=1, max_queue_size=12, workers=6, verbose=1, validation_data=(val_a, val_b)) for layer in model2.layers: if not isinstance(layer, BatchNormalization): layer.trainable = True model2.compile(Adam(lr=0.002), loss='mean_squared_error') model2.save('cropping.model') # Generate bounding boxes tagged = [p for _,p,_ in pd.read_csv(config.train_csv).to_records()] submit = [p for _,p,_ in pd.read_csv(config.sample_submission).to_records()] join = tagged + submit # If the picture is part of the bounding box dataset, use the golden value. p2bb = {} for i,(p,coords) in enumerate(data): p2bb[p] = bounding_rectangle(coords, read_raw_image(p).size) len(p2bb) # For other pictures, evaluate the model. p2bb = {} for p in tqdm(join): if p not in p2bb: img,trans = read_for_validation(p) a = np.expand_dims(img, axis=0) x0, y0, x1, y1 = model2.predict(a).squeeze() (u0, v0),(u1, v1) = coord_transform([(x0,y0),(x1,y1)], trans) img = read_raw_image(p) u0 = max(u0, 0) v0 = max(v0, 0) u1 = min(u1, img.size[0]) v1 = min(v1, img.size[1]) p2bb[p] = (u0, v0, u1, v1) with open('./input/cropping.csv', 'w') as f: f.write('Image, x0, y0, x1, y1\n') for p in p2bb: u0, v0, u1, v1 = p2bb[p] f.write('{},{},{},{},{}\n'.format(str(p), str(u0), str(v0), str(u1), str(v1)))
import cv2 import data import hog import svm as SupportVectorMachine import objectdetector as od import evaluate import pickle import math train_data, train_data_mirrored, train_labels = data.get_train_data() svm = SupportVectorMachine.SVM("trained_svm.data") if not svm.isTrained: svm.train(train_data, train_labels) mirrored_svm = SupportVectorMachine.SVM("trained_mirrored_svm.data") if not mirrored_svm.isTrained: mirrored_svm.train(train_data_mirrored, train_labels) hog = hog.HOGDescriptor() objectDetector = od.ObjectDetector(scales=10, scaling=0.8, stride=(6, 5), detection_threshold=0.6, overlap_threshold=0.5) while True: command = input("\nType a command\n") arguments = command.split()
def get_performance(labels_per_class): clear_session() model = Sequential() model.add( Conv2D(num_filters_1, (patch_size, patch_size), activation='relu', padding="valid", input_shape=(image_size, image_size, num_channels), kernel_initializer='he_uniform')) model.add( Conv2D(num_filters_2, (patch_size, patch_size), activation='relu', kernel_initializer='he_uniform')) model.add(MaxPooling2D(pool_size=(patch_size, patch_size))) model.add( Conv2D(num_filters_3, (patch_size, patch_size), activation='relu', kernel_initializer='he_uniform')) model.add(Flatten()) model.add(Dropout(dropout_rate_1)) model.add( Dense(num_hidden_1, activation='relu', kernel_initializer='he_uniform')) model.add(BatchNormalization()) model.add(Dropout(dropout_rate_2)) model.add( Dense(num_hidden_2, activation='relu', kernel_initializer='he_uniform')) model.add(BatchNormalization()) model.add(Dropout(dropout_rate_3)) model.add( Dense(num_labels, kernel_initializer='he_uniform', activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=learning_rate), metrics=['accuracy']) x_train, y_train = get_train_data(labels_per_class) x_test, y_test = get_test_data() start = time() history = model.fit( x_train, y_train, batch_size=batch_size, epochs=1000, # will be aborted by early stopping when necessary callbacks=[ # should use validation data instead, but, well, I just don't EarlyStopping(monitor='loss', patience=10, verbose=False, mode='min'), ReduceLROnPlateau(monitor='loss', factor=0.3, patience=5, verbose=False, mode='min'), ], verbose=False) train_acc = history.history['acc'][-1] train_time = time() - start start = time() test_acc = model.evaluate(x_test, y_test, verbose=False)[1] test_time = time() - start return train_acc, test_acc, train_time, 1000 * test_time / len(y_test)
import numpy as np from keras.callbacks import ModelCheckpoint from keras.callbacks import EarlyStopping from keras.callbacks import ReduceLROnPlateau from keras.optimizers import Adam from model import get_model from data import get_train_data from metrics import dice_coef, dice_coef_loss from all_params import * X_train, Y_train = get_train_data() X_train = X_train.astype('float32') Y_train = Y_train.astype('float32') X_train /= 255.0 Y_train /= 255.0 model = get_model() model.summary() model.compile(optimizer=Adam(lr=BASE_LR), loss=dice_coef_loss, metrics=[dice_coef]) callbacks = [ModelCheckpoint(MODEL_CHECKPOINT_DIR + '{epoch:02d}_{loss:.06f}.hdf5', monitor='loss', save_best_only=True), ReduceLROnPlateau(monitor='loss', factor=0.1, patience=PATIENCE, min_lr=1e-07, verbose=1)] model.fit(X_train, Y_train, batch_size=BATCH_SIZE, epochs=1, callbacks=callbacks) model.save_weights(WEIGHTS)
np.concatenate( (np.ones(len(question_ids)), np.zeros(max_question_length - len(question_ids))))) answers.append(answer) return passages_ids, questions_ids, passages_length, questions_length, passages_mask, questions_mask, answers vocab = {} with open(data_cfg['vocab_file'], 'rb') as handle: vocab = pickle.load(handle) handle.close() train_data = get_train_data( data_cfg['data_path'], data_cfg['train_data'], vocab, [preprocess_cfg['pa_max_sent_len'], preprocess_cfg['qu_max_sent_len']]) dev_data = get_dev_data( data_cfg['data_path'], data_cfg['dev_data'], vocab, [preprocess_cfg['pa_max_sent_len'], preprocess_cfg['qu_max_sent_len']]) model = RecurrentModel(preprocess_cfg['vocab_size'], model_cfg['embedding_dim'], model_cfg['hidden_size']).to(device) model.train() print(model) optimizer = Adam(model.parameters(), train_cfg["lr"], weight_decay=train_cfg["weight_decay"]) lr_now = train_cfg["lr"]
import numpy as np from tensorflow import keras import data import network as nt import show if __name__ == '__main__': FILENAME_NT = 'model.h5' FILENAME_TRAIN_DATA = 'data/training.csv' FILENAME_TEST_DATA = 'data/test.csv' df_train = pd.read_csv(FILENAME_TRAIN_DATA) df_test = pd.read_csv(FILENAME_TEST_DATA) X_train, y_train = data.get_train_data(df_train) X_test = data.get_test_data(df_test) model = nt.get_model() for i in range(10): image = X_test[i,:,:,0] show.show_predicted_image_with_keypoints(model, data.PIPELINE, image) # images = np.array([X_train[i,:,:,0] for i in range(10)]).reshape(10, 96, 96, 1) # show.show_predicted_images_with_keypoints(model, data.PIPELINE, images) # ------------------------------------------------------------------- # history = nt.fit_model(model, FILENAME_NT, X_train, y_train) # model.save(FILENAME_NT) # -------------------------------------------------------------------
clf.fit(x_train,y_train) # result = pd.DataFrame.from_dict(clf.cv_results_) # with open(m[0]+'.csv','w') as f: # result.to_csv(f) print('The parameters of the best '+m[0]+' are: ') print(clf.best_params_) y_pred = clf.predict(x_train) print(classification_report(y_true=y_train, y_pred=y_pred)) y_test_pred = clf.predict(x_test) # print(classification_report(y_true=y_test, y_pred=y_test_pred)) # df_test_y = pd.DataFrame(y_test_pred , columns=['Survived']) df = pd.DataFrame(data.get_test_PassengerId()).join(pd.DataFrame(y_test_pred , columns=['Survived'])) print(df.head()) df.to_csv('./titanic_test_result_'+m[0]+'.csv',index=False) import data,preprocess if '__main__' == __name__: train_data = data.get_train_data() train_data =preprocess.fill_missing_data(train_data ,sex_cat=True, embarked_one_hot=True) # train_data = preprocess.feature_selection(train_data) # train_data = preprocess.detect_outlier(train_data,drop=True) print(train_data.head()) x_train,y_train = data.split(train_data) # print(y_train.values) x_test,y_test = data.get_test_x(),data.get_test_y() x_test =preprocess.fill_missing_data(x_test,is_train=False,sex_cat=True, embarked_one_hot=True) # poly = PolynomialFeatures(2,interaction_only=True) # x_train = poly.fit_transform(x_train.values) # x_test = poly.fit_transform(x_test.values) model_selection(x_train.values,y_train.values,x_test.values,y_test.values)
def test_on_dataset(dataset = 'sonar', kernel = 'rbf', n_samples = 208, n_features = 60): l = 1; fig = plt.figure(figsize=(8,3)) for C in [1, 10, 100, 1000]: ax = fig.add_axes([0.25*l-0.20, 0.3,0.18,0.4]) acc_1 = [] acc_2 = [] acc_3 = [] for L in [0, n_samples//10, n_samples//5, int(n_samples//3.3333), int(n_samples//2.5)]: X_, y_ = data.get_train_data(dataset, n_samples, n_features) X_c, y_c, = data.split_train_test(X_, y_) # trainer = svm.SVMTrainer(kernel, C) # predictor = trainer.train(X, y, remove_zero=True) # print(predictor.error(X_val, y_val)) # print(acc_1, acc_2, acc_3) acc_1.append(0) acc_2.append(0) acc_3.append(0) for i in range(5): X, y = [], [] for j in range(5): if j != i: X.extend(X_c[j]) y.extend(y_c[j]) X_val, y_val = np.array(X_c[i]), np.array(y_c[i]) X, y = np.array(X), np.array(y) X, y, flip_pnts = data.apply_rand_flip(X, y, L) trainer = svm.SVMTrainer(kernel, C) predictor = trainer.train(X, y, remove_zero=True) acc_1[-1] += (1-predictor.error(X_val, y_val)) #print(acc_1[-1]) trainer = svm.SVMTrainer(kernel, C, ln_robust=True, mu=0.1) predictor = trainer.train(X, y, remove_zero=True) acc_2[-1] += (1-predictor.error(X_val, y_val)) #print(acc_2[-1]) trainer = svm.SVMTrainer(kernel, C, ln_robust=True, mu=0.5 - 1e-4) predictor = trainer.train(X, y, remove_zero=True) acc_3[-1] += (1-predictor.error(X_val, y_val)) #print(acc_3[-1]) acc_1[-1] /= 5 acc_2[-1] /= 5 acc_3[-1] /= 5 acc_1 = np.array(acc_1) acc_2 = np.array(acc_2) acc_3 = np.array(acc_3) flip_ratio = np.linspace(0, 40,acc_1.shape[0]) plt.ylim((0.3,1)) print(acc_1) ax.plot(flip_ratio,acc_1,color="blue", label='mu=0') ax.plot(flip_ratio,acc_2,color="red", label='mu=0.1') ax.plot(flip_ratio,acc_3,color="black", label='mu=0.5') ax.set_xlabel('% flipped labels') ax.set_ylabel('test acc') ax.set_title("C = %d" % C) ax.spines['right'].set_color('black') ax.spines['top'].set_color('black') ax.spines['left'].set_color('black') ax.spines['bottom'].set_color('black') ax.patch.set_facecolor("white") ax.grid(color='r', linestyle='--',linewidth=1, alpha=0.3) #if l == 1: # ax.legend(facecolor='white') l += 1 fig.suptitle(dataset, fontsize=12)
def main(): args = parse_arguments(sys.argv[1:]) set_seed(args['random_seed']) df = get_train_data() test_df = get_test_data() NUM_CLASSES = df['label'].nunique() train_texts, val_texts, train_labels, val_labels = train_test_split(df['sentence'], df['label_int'], random_state=args['random_seed'], test_size=.2) print(train_texts.shape, val_texts.shape, train_labels.shape, val_labels.shape) tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased') train_encodings = tokenizer(train_texts.to_list(), truncation=True, padding=True) val_encodings = tokenizer(val_texts.to_list(), truncation=True, padding=True) test_encodings = tokenizer(test_df['sentence'].to_list(), truncation=True, padding=True) train_dataset = HINTDataset(train_encodings, train_labels.values) val_dataset = HINTDataset(val_encodings, val_labels.values) test_dataset = HINTDataset(test_encodings, test_df['label_int'].values) model = HINTModel(num_classes=NUM_CLASSES) device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') model.to(device) model.ffn.train() train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True) val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False) optim = AdamW(model.parameters(), lr=args['learning_rate']) loss_fn = nn.CrossEntropyLoss() step = 0 best_acc = 0 Path(args['model_dir']).mkdir(parents=True, exist_ok=True) for epoch in range(args['epochs']): train_loss, train_acc, train_f1 = train_fn(model, train_loader, loss_fn, optim, device) val_loss, val_acc, val_f1 = val_fn(model, val_loader, loss_fn, device) print(f"{epoch+1}: train: [{train_loss:.3f}, {train_acc:.3f}, {train_f1:.3f}], val: [{val_loss:.3f}, {val_acc:.3f}, {val_f1:.3f}]") if val_acc > best_acc: best_acc = val_acc step = 0 torch.save(model.state_dict(), f"{args['model_dir']}/{args['model_path']}") else: step += 1 if step >= args['max_steps']: break model.load_state_dict(torch.load(f"{args['model_dir']}/{args['model_path']}", map_location=device)) print("model successfully loaded!") test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False) preds, probs = inference_fn(model, test_loader, device) test_df['preds'] = preds test_df['probs'] = probs test_df['label_int'] = test_df['label_int'].fillna(NUM_CLASSES + 1) test_df['updated_preds'] = test_df['preds'] test_df.loc[test_df['probs'] <= args['min_prob'], 'updated_preds'] = NUM_CLASSES + 1 Path(args['output_dir']).mkdir(parents=True, exist_ok=True) test_df.to_csv(f"{args['output_dir']}/{args['test_file_name']}", index=False) acc1 = accuracy_score(test_df['label_int'], test_df['preds']) acc2 = accuracy_score(test_df['label_int'], test_df['updated_preds']) f11 = f1_score(test_df['label_int'], test_df['preds'], average='weighted') f12 = f1_score(test_df['label_int'], test_df['updated_preds'], average='weighted') print(f"Default: acc: {acc1}, f1_score: {f11}") print(f"Updated with Min Prob: acc: {acc2}, f1_score: {f12}")
def test_on_bidirectional_lstm(forward_model, backward_model, classifier=MLPClassifier()): # '''Generate Sentence embedding with forward model''' sent1_train_indices, sent2_train_indices, word_to_index, index_to_word, label_train = data.get_train_data( VOCABULARY_SIZE) first_train_sentences = generate_sentence_embeddings( forward_model, sent1_train_indices) second_train_sentences = generate_sentence_embeddings( forward_model, sent2_train_indices) # '''Generate Sentence embedding with backward model''' first_train_sentences_r = generate_sentence_embeddings( backward_model, sent1_train_indices) second_train_sentences_r = generate_sentence_embeddings( backward_model, sent2_train_indices) # '''Combine first train sentence (forward and backward embedding)''' first_train_sentences_combined = combine_forward_and_backward_vectors( first_train_sentences, first_train_sentences_r) # '''Combine second train sentence (forward and backward embedding)''' second_train_sentences_combined = combine_forward_and_backward_vectors( second_train_sentences, second_train_sentences_r) assert len(first_train_sentences_combined) == len( second_train_sentences_combined) # '''generate feature vector by all pair comparison, then pooling''' feature_vector_train = generate_feature_vector( first_train_sentences_combined, second_train_sentences_combined) print("Train data Shape : ", feature_vector_train.shape) # '''Generate test data embeddings''' sent1_test_indices, sent2_test_indices, word_to_index, index_to_word, label_test = data.get_test_data( VOCABULARY_SIZE) first_test_sentences = generate_sentence_embeddings( forward_model, sent1_test_indices) second_test_sentences = generate_sentence_embeddings( forward_model, sent2_test_indices) # '''Generate test data embedding backward''' first_test_sentences_r = generate_sentence_embeddings( backward_model, sent1_test_indices) second_test_sentences_r = generate_sentence_embeddings( backward_model, sent2_test_indices) # '''combine first sentence test embedding (forward and backward)''' first_test_sentences_combined = combine_forward_and_backward_vectors( first_test_sentences, first_test_sentences_r) # '''combine second sentence test embedding (forward and backward)''' second_test_sentences_combined = combine_forward_and_backward_vectors( second_test_sentences, second_test_sentences_r) assert len(first_test_sentences_combined) == len( second_test_sentences_combined) # '''Generate feature vector for test; all pair comparison then pooling''' feature_vector_test = generate_feature_vector( first_test_sentences_combined, second_test_sentences_combined) print("Test data Shape : ", feature_vector_test.shape) # '''Building the Fully connected layer''' build_classifier_and_test(feature_vector_train, label_train, feature_vector_test, label_test, classifier, print_train_result=False)
def test_on_forward_LSTM(model, classifier=MLPClassifier()): # '''Generate Sentence embedding with forward model''' sent1_train_indices, sent2_train_indices, word_to_index, index_to_word, label_train = data.get_train_data( VOCABULARY_SIZE) first_train_sentences = generate_sentence_embeddings( model, sent1_train_indices) second_train_sentences = generate_sentence_embeddings( model, sent2_train_indices) assert len(first_train_sentences) == len(second_train_sentences) # '''generate feature vector by all pair comparison, then pooling''' feature_vector_train = generate_feature_vector(first_train_sentences, second_train_sentences) print("Train data Shape : ", feature_vector_train.shape) # '''Generate test data embeddings''' sent1_test_indices, sent2_test_indices, word_to_index, index_to_word, label_test = data.get_test_data( VOCABULARY_SIZE) first_test_sentences = generate_sentence_embeddings( model, sent1_test_indices) second_test_sentences = generate_sentence_embeddings( model, sent2_test_indices) assert len(first_test_sentences) == len(second_test_sentences) # '''Generate feature vector for test; all pair comparison then pooling''' feature_vector_test = generate_feature_vector(first_test_sentences, second_test_sentences) # print(feature_vector_test[0]) print("Test data Shape : ", feature_vector_test.shape) # '''Building the Fully connected layer''' build_classifier_and_test(feature_vector_train, label_train, feature_vector_test, label_test, classifier, print_train_result=False)
Returns: class Tree: class def in tree.py learned tree. """ if max_levels <= 0: # the maximum level depth is reached return make_leaf(data) feature, threshold = find_best_split(data) if threshold is None: # there is no split that gains information return make_leaf(data) new_tree = Tree() new_tree.leaf = False new_tree.feature, new_tree.threshold = feature, threshold data_left, data_right = split_data(data, new_tree.feature, new_tree.threshold) new_tree.left = c45(data_left, max_levels - 1) new_tree.right = c45(data_right, max_levels - 1) return new_tree if __name__ == "__main__": # load dummy data from data import get_train_data, get_test_data train = get_train_data() test = get_test_data() TREE_DEPTH = 9 tree = c45(train, TREE_DEPTH) predictions = [predict(tree, point) for point in test] acc = accuracy(test, predictions) print(acc)
type=str, default='log.txt', help='log file path') return parser.parse_args() args = parse_args() writer = SummaryWriter(log_dir='logs_board/resnext') devs = [int(x) for x in args.gpus.split(',')] lr_step_epochs = [int(x) for x in args.lr_step_epochs.split(',')] if args.log_dir: utils.create_dir(args.log_dir) logger = utils.Logger(os.path.join(args.log_dir, args.log_file)) train_data = get_train_data() train_loader = DataLoader(train_data, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=True) model = models.resnext50_32x4d(pretrained=True) num_fc = model.fc.in_features model.fc = nn.Linear(num_fc, args.num_classes) if args.net_params: print('=> Loading checkpoint... ') resume_model = torch.load(args.net_params) model_dict = resume_model['model'] args.begin_epoch = resume_model['epoch'] pred_dict = {}