def main(): parser = argparse.ArgumentParser() parser.add_argument('-m', '--model', help='model name to save/load checkpoints') parser.add_argument('-c', '--checkpoint') args = parser.parse_args() model, misc = init(args.model, args.checkpoint) voc, checkpoint, ckpt_mng, model_config = ( misc[k] for k in ['voc', 'checkpoint', 'ckpt_mng', 'model_config'] ) trainer_args = (model, ckpt_mng) kargs = config_to_kargs(model_config) if model_config.TRAINING_TASK == 'rate': Trainer = RankerTrainer kargs['rank_loss_type'] = model_config.RANK_LOSS_TYPE kargs['loss_lambda'] = model_config.LOSS_LAMBDA elif model_config.TRAINING_TASK == 'review': Trainer = ReviewTrainer kargs['voc'] = voc elif model_config.TRAINING_TASK == 'nrt': Trainer = NRTTaskTrainer kargs['loss_lambda'] = model_config.LOSS_LAMBDA else: Trainer = MultiTaskTrainer kargs['voc'] = voc kargs['loss_lambda'] = model_config.LOSS_LAMBDA kargs['rank_loss_type'] = model_config.RANK_LOSS_TYPE if 'rank_loss_type' in kargs and kargs['rank_loss_type']: kargs['grp_config'] = config.LOSS_TYPE_GRP_CONFIG[kargs['rank_loss_type']] print(f'Training config:', {k: v for k, v in kargs.items() if k in KARGS_LOG_KEYS}) trainer = Trainer( *trainer_args, **kargs ) if checkpoint: trainer.resume(checkpoint) train_dataset = load_dataset('train') dev_dataset = load_dataset('dev') # Ensure dropout layers are in train mode model.train() trainer.train(train_dataset, dev_dataset)
def best(experiment): df = experiment.fetch_data().df best_arm_name = df.arm_name[df['mean'] == df['mean'].min()].values[0] best_arm = experiment.arms_by_name[best_arm_name] print(best_arm) dtype = torch.float device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') batch_size = 100 train_dataloader, test_dataloader = load_dataset('data/aponc_sda.npz', batch_size, device) combined_train_test_set = torch.utils.data.ConcatDataset([ train_dataloader.dataset, test_dataloader.dataset, ]) combined_train_test_loader = torch.utils.data.DataLoader( combined_train_test_set, batch_size=batch_size, shuffle=True, ) net = train(train_dataloader, best_arm.parameters, device) test_mse_loss = test(test_dataloader, net) print('MSE loss (test set): %f' % (test_mse_loss))
def train_test(parameterization): dtype = torch.float device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') batch_size = 100 train_dataloader, test_dataloader = load_dataset('data/aponc_sda.npz', batch_size, device) net = train(train_dataloader, parameterization, device) return test(test_dataloader, net)
self.best_mse = float("inf") self.population = [] self.init_population() return self def write(self): pass def visualize(self): pass if __name__ == "__main__": import main data = main.load_dataset("data/ripple_0.0_50_200") # init ga input_size = data.shape[1] - 1 hidden_layer_size = 5 output_size = 1 population_size = 10 selection_size = 4 learning_rate = 1e-3 epochs = 10 generations = 10 estimator = GeneticAlgorithm( True, input_size, hidden_layer_size, output_size, population_size,
self.f = "" self.learning_rate_decay_frequency = 99999999 self.batch_size = 32 self.num_epochs = 100 self.n_epochs_valudations = 5 self.loader_num_workers = 4 self.pretrained_weights = "runs/Sep23_05-38-54_ai-servers-3css_vn_vanilla_v2/latest_checkpoint.pth" opt = opt_config() if __name__ == "__main__": opt = opt_config() trainset, testset = load_dataset(opt) texts = [t for t in trainset.get_all_texts()] model, optimizer = create_model_and_optimizer( opt, [t for t in trainset.get_all_texts()]) rec_model = TIRGReconstructionModel(model, embed_dim=512, img_shape=[3, 120, 180]).cuda() pretrained_weights = torch.load("best_rec_model.pth") rec_model.load_state_dict(pretrained_weights) trainloader = trainset.get_loader(batch_size=opt.batch_size, shuffle=True, drop_last=True, num_workers=opt.loader_num_workers)
if all_target_captions[i].split()[0] in [c.split()[0] for c in nns[:k]]: r += 1 r /= len(nn_result) out += [('recall_top' + str(k) + '_correct_adj', r)] r = 0.0 for i, nns in enumerate(nn_result): if all_target_captions[i].split()[1] in [c.split()[1] for c in nns[:k]]: r += 1 r /= len(nn_result) out += [('recall_top' + str(k) + '_correct_noun', r)] return out if __name__ == '__main__': import main import img_text_composition_models opt = main.parse_opt() trainset, testset = main.load_dataset(opt) embed_dim = 512 path = 'checkpoints/checkpoint_fashion200k.pth' device = torch.device('cpu') model = img_text_composition_models.TIRG( [t for t in trainset.get_all_texts()], embed_dim=embed_dim) checkpoint = torch.load(path, map_location=device) model.load_state_dict(checkpoint['model_state_dict']) model.eval() test(opt, model, testset)
def work(data, out_json_filename, out_dot_filename): indexes = [i for i in range(len(data[0]) - 1)] tree = build_decision_tree(headers, indexes, data) # save tree to file jsonstr = json.dumps(tree) with open(out_json_filename, 'w') as f: f.write(jsonstr) dot.clear() generate_graph(-1, '<', tree, 0) dot.format = 'png' dot.view(out_dot_filename, 'missing_value_output') if __name__ == '__main__': folder = 'data/' output_folder = "missing_value_output" train_data_filename = os.path.join(folder, 'gene_expression_with_missing_values.csv') headers, origin_data = load_dataset(train_data_filename) train_data_random = process_data_with_random(origin_data) work(train_data_random, os.path.join(output_folder, 'random.json'), "random.dot") train_data_average = process_data_with_average(origin_data) work(train_data_average, os.path.join(output_folder, 'average.json'), "average.dot") train_data_median = process_data_with_median(origin_data) work(train_data_median, os.path.join(output_folder, 'median.json'), "median.dot")
import matplotlib.pyplot as plt from scipy.stats import kendalltau import seaborn as sns if __name__ == '__main__': parser = ArgumentParser(description='Analyze the underground dataset') parser.add_argument('--dataset', required=True, type=str) args = parser.parse_args() location = args.dataset if not (path.exists(location) and path.isfile(location)): print("Please specify a valid path") exit(1) df = load_dataset(location) df = Filter.apply_all_filters(df) to_drop = [ 'worldwide', ' worldwide', '', 'you', 'world.', 'web', 'internet', 'me on bmr', 'my email', 'torland', 'me', 'bmr', 'pm', 'world', 'bm', 'foron', 'tor', 'the matrix', '------ worldwide', 'here', 'bmr pm', 'my inbox', 'ww', 'email', 'my bmr pm', 'my computer', 'inbox', 'my', 'my pc', 'my pm', 'digital download', 'optiman', 'cyberspace', 'twilight zone', 'bettors paradise', 'international', 'bmr inbox', 'darknet', 'undeclared', "atm's hack guides + biggest collection of ebooks/manuals", 'the united snakes of captivity', 'entire world', 'eu', 'europe', 'centre europe', 'midwest usa', 'korea, north', 'my bmr', 'north korea', 'earth', 'asia', 'north america', 'eu in sealed envelope', 'cherryflavor', 'www', 'browser',
weight_path = "logs/20200509-143023/ResNet_8_weights/checkpoint" def get_confusion_matrix(weight_path, data): (x_test, y_test) = data x_test = x_test[:500] y_test = y_test[:500] model = models.ResNet(depth=8) _ = model(x_test[0:2]) model.load_weights(weight_path) predictions = model(x_test, training=False).numpy() predictions = tf.argmax(predictions, axis=1) labels = tf.argmax(y_test, axis=1) confusion_matrix = tf.math.confusion_matrix(labels=labels, predictions=predictions, num_classes=10, dtype=tf.float32) print(confusion_matrix) confusion_matrix = tf.transpose(tf.divide(tf.transpose(confusion_matrix), tf.reduce_sum(y_test, axis=0))) print(tf.reduce_sum(y_test, axis=0)) return confusion_matrix.numpy() (_, _), data = main.load_dataset() matrix = get_confusion_matrix(weight_path=weight_path, data=data) df_cm = pd.DataFrame(matrix, range(10), range(10)) sn.set(font_scale=1.4) # for label size sn.heatmap(df_cm, annot=True, annot_kws={"size": 14}) # font size plt.show()
from os.path import dirname, join from underthesea_flow.flow import Flow from underthesea_flow.model import Model from underthesea_flow.validation.validation import TrainTestSplitValidation from main import load_dataset from model.model_fasttext import FastTextClassifier from models.fb_bank_2_act_fasttext.model_fasttext import FastTextPredictor if __name__ == '__main__': data_file = join(dirname(dirname(dirname(dirname(__file__)))), "data", "fb_bank_act", "corpus", "data.xlsx") X, y = load_dataset(data_file) flow = Flow() flow.log_folder = "log" flow.data(X, y) flow.add_model(Model(FastTextClassifier(), "FastText")) # flow.set_learning_curve(0.7, 1, 0.3) flow.set_validation(TrainTestSplitValidation(test_size=0.1)) # flow.set_validation(CrossValidation(cv=5)) # flow.validation() model_name = "FastText" model_filename = join("model", "fasttext.model")
def main(): opt = parse_opt() trainset, testset = load_dataset(opt) # input_img = input() embed_dim = 512 # print(trainset[0]) texts = [t for t in trainset.get_all_texts()] print(len(texts)) path = 'checkpoints/checkpoint_fashion200k.pth' device = torch.device('cpu') vocab = SimpleVocab() for text in texts: vocab.add_text_to_vocab(text) vocab_size = vocab.get_size() model = img_text_composition_models.TIRG(vocab_size, embed_dim=embed_dim) checkpoint = torch.load(path, map_location=device) model.load_state_dict(checkpoint['model_state_dict']) model.eval() texts = ['green linen-blend a-line dress'] # x = vocab.encode_text(texts) in_dir = Path('./output/source') in_dir.mkdir(parents=True, exist_ok=True) input_img_path = Path(opt.dataset_path).joinpath( 'women/dresses/casual_and_day_dresses/51727804/51727804_0.jpeg') with open(input_img_path, 'rb') as f: img = PIL.Image.open(f) img.save(str(in_dir.joinpath(input_img_path.parts[-1]))) img = img.convert('RGB') print(input_img_path, str(in_dir.joinpath(input_img_path.parts[-1]))) transform = torchvision.transforms.Compose([ torchvision.transforms.Resize(224), torchvision.transforms.CenterCrop(224), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) img = transform(img).unsqueeze(0) # print(img.shape) # to tensor # img = torch.rand(1, 3, 256, 256) itexts = [vocab.encode_text(s) for s in texts] lengths = [len(t) for t in itexts] pitexts = torch.zeros((np.max(lengths), len(texts))).long() # [T, B] for i in range(len(texts)): pitexts[:lengths[i], i] = torch.tensor(itexts[i]) pitexts = pitexts.to(device) all_queries = model.compose_img_text(img, pitexts, lengths).data.cpu().numpy() # compute all image features if os.path.isfile('database.npy'): with open('database.npy', 'rb') as f: all_imgs = np.load(f) else: imgs = [] all_imgs = [] for i in tqdm(range(len(testset.imgs))): imgs += [testset.get_img(i)] if len(imgs) >= opt.batch_size or i == len(testset.imgs) - 1: if 'torch' not in str(type(imgs[0])): imgs = [torch.from_numpy(d).float() for d in imgs] imgs = torch.stack(imgs).float() imgs = imgs.to(device) imgs = model.extract_img_feature(imgs).data.cpu().numpy() all_imgs += [imgs] imgs = [] all_imgs = np.concatenate(all_imgs) with open('database.npy', 'wb') as f: np.save(f, all_imgs) # feature normalization for i in range(all_queries.shape[0]): all_queries[i, :] /= np.linalg.norm(all_queries[i, :]) for i in range(all_imgs.shape[0]): all_imgs[i, :] /= np.linalg.norm(all_imgs[i, :]) # match test queries to target images, get nearest neighbors top_n = 5 sims = all_queries.dot(all_imgs.T) sims = np.squeeze(sims) nn_result = np.argsort(-sims)[:top_n] out_dir = Path('./output/target') out_dir.mkdir(parents=True, exist_ok=True) for n in nn_result: file_path = Path(opt.dataset_path).joinpath( testset.imgs[n]['file_path']) print(file_path) with open(file_path, 'rb') as f: img = PIL.Image.open(f) img.save(str(out_dir.joinpath(file_path.parts[-1])))