def main(): # create configurations print('load pre-defined configs and pre-processed dataset...') config = Config() # create word and tag processor word_processor = Processor(config.word_vocab_filename, config.char_vocab_filename, lowercase=True, use_chars=True, allow_unk=True) tag_processor = Processor(config.tag_filename) # load train, development and test dataset train_set = Dataset(config.train_filename, config.tag_idx, word_processor, tag_processor, max_iter=config.max_iter) dev_set = Dataset(config.dev_filename, config.tag_idx, word_processor, tag_processor, max_iter=config.max_iter) test_set = Dataset(config.test_filename, config.tag_idx, word_processor, tag_processor, max_iter=config.max_iter) # build model model = SeqLabelModel(config) model.train(train_set, dev_set, test_set) # testing model.evaluate(test_set, eval_dev=False) # interact idx_to_tag = {idx: tag for tag, idx in config.tag_vocab.items()} interactive_shell(model, word_processor, idx_to_tag)
def train(): to_tfrecord = To_tfrecords(txt_file='trainval.txt') to_tfrecord.transform() train_generator = Dataset(filenames='data/tfr_voc/trainval.tfrecords', enhance=True) train_dataset = train_generator.transform() yolo = YOLONET()
def get_dataset(self): '''''' self.train_dataset = Dataset(self.config.train_file, self.vocab, self.config) self.dev_dataset = Dataset(self.config.dev_file, self.vocab, self.config) self.test_dataset = Dataset(self.config.test_file, self.vocab, self.config)
def get_data_prep(self, name="mnist"): """Loads and preprocess the traning and testing data. Replaces the standard ways of loading conventional datasets, makes it easier to use different ones to experiemnt with the model Parameters ---------- name : string A string with the name of standard datasets whithin: mnist, fashion_mnist, cifar10 and cifar100. Returns ------- type:numpy arrays Returns numpy arryas of train and test ready to be fed into the model. """ dataset = Dataset(name) dataprep = Dataprep(dataset.x_train, dataset.y_train, dataset.x_test, dataset.y_test) x_train = dataprep.x_train y_train = dataprep.y_train x_test = dataprep.x_test y_test = dataprep.y_test return x_train, y_train, x_test, y_test
def calc_test_loss(test_set = Dataset(test_x,test_y),display=True): accs = [] worksum = int(len(test_x) / batch_size) loss_list = [] predict_list = [] target_list = [] source_list = [] pb = ProgressBar(worksum=worksum,info="validating...",auto_display=display) pb.startjob() #test_set = Dataset(test_x,test_y) for j in range(worksum): batch_x,batch_y = test_set.next_batch(batch_size) lx = [seq_max_len] * batch_size ly = [seq_max_len] * batch_size bx = [np.sum(m > 0) for m in batch_x] by = [np.sum(m > 0) for m in batch_y] tmp_loss,tran = session.run([train_loss,translations],feed_dict={x:batch_x,y:batch_y, y_in: np.concatenate(( np.ones((batch_y.shape[0],1),dtype=np.int) * ch2ind['<go>'],batch_y[:,:-1]) ,axis=1) ,x_len:lx,y_len:ly, y_real_len:by, x_real_len:bx}) loss_list.append(tmp_loss) tmp_acc = cal_acc(tran,batch_y) accs.append(tmp_acc) predict_list += [i for i in tran] target_list += [i for i in batch_y] source_list += [i for i in batch_x] pb.complete(1) return np.average(loss_list),np.average(accs),get_bleu_score(predict_list,target_list),predict_list,target_list,source_list
def eval_bs(test_set: Dataset, vocab: Vocab, model: Seq2Seq, params: Params): test_gen = test_set.generator(1, vocab, None, True if params.pointer else False) n_samples = int(params.test_sample_ratio * len(test_set.pairs)) if params.test_save_results and params.model_path_prefix: result_file = tarfile.open(params.model_path_prefix + ".results.tgz", 'w:gz') else: result_file = None model.eval() r1, r2, rl, rsu4 = 0, 0, 0, 0 prog_bar = tqdm(range(1, n_samples + 1)) for i in prog_bar: batch = next(test_gen) scores, file_content = eval_bs_batch(batch, model, vocab, pack_seq=params.pack_seq, beam_size=params.beam_size, min_out_len=params.min_out_len, max_out_len=params.max_out_len, len_in_words=params.out_len_in_words, details=result_file is not None) if file_content: file_content = file_content.encode('utf-8') file_info = tarfile.TarInfo(name='%06d.txt' % i) file_info.size = len(file_content) result_file.addfile(file_info, fileobj=BytesIO(file_content)) if scores: r1 += scores[0]['1_f'] r2 += scores[0]['2_f'] rl += scores[0]['l_f'] rsu4 += scores[0]['su4_f'] prog_bar.set_postfix(R1='%.4g' % (r1 / i * 100), R2='%.4g' % (r2 / i * 100), RL='%.4g' % (rl / i * 100), RSU4='%.4g' % (rsu4 / i * 100))
def main(_): data_object = Dataset() cg = CGAN(data_ob=data_object, sample_dir=FLAGS.sample_dir, output_size=FLAGS.output_size, learn_rate=FLAGS.learn_rate, batch_size=FLAGS.batch_size, z_dim=FLAGS.z_dim, y_dim=FLAGS.y_dim, log_dir=FLAGS.log_dir, model_path=FLAGS.model_path, visua_path=FLAGS.visua_path, y_min=FLAGS.y_min, y_max=FLAGS.y_max) cg.build_model() if FLAGS.op == 0: cg.train() elif FLAGS.op == 1: cg.test() else: print("op should be either 0 or 1.") assert (False)
def TestModel(args, model, dataset, num_clips, fourier=False): trainfunc = Dataset(dataset, range(1, num_clips)) trainloader = torch.utils.data.DataLoader(trainfunc, batch_size=args.batch_size, shuffle=False, num_workers=0) model.eval() if not args.variational: lossfunc = nn.MSELoss() else: lossfunc = vae_loss losses = [] for i, data in enumerate(trainloader): if not fourier: inputs = data.to(args.device) outputs = model(inputs) loss = lossfunc(outputs, inputs) losses.append(loss.item()) else: comp_input = torch.stft(data.squeeze(), n_fft=2048, window=torch.hann_window(2048), return_complex=True) real, imag = comp_input.real.to(args.device), comp_input.imag.to( args.device) real, imag = real[None, None], imag[None, None] # unsqueeze twice in 0th dim realOUT, imagOUT = model(real, imag) loss_real = lossfunc(realOUT, real) loss_imag = lossfunc(imagOUT, imag) losses.append(loss_real.item() + loss_imag.item()) return np.mean(losses)
def encode(X, encoder, params): varying = bool(np.isnan(np.sum(X))) test = Dataset(X) test_generator = torch.utils.data.DataLoader(test, batch_size=1) features = np.zeros((X.shape[0], params['out_channels'])) encoder = encoder.eval() count = 0 with torch.no_grad(): if not varying: for batch in test_generator: if params['cuda']: batch = batch.cuda(params['gpu']) features[count:(count + 1)] = encoder(batch).cpu() count += 1 else: for batch in test_generator: if params['cuda']: batch = batch.cuda(params['gpu']) length = batch.size(2) - torch.sum(torch.isnan( batch[0, 0])).data.cpu().numpy() features[count:(count + 1)] = encoder( batch[:, :, :length]).cpu() count += 1 return features
def evaluate(self, sess, feat_index, feat_value, label, batch_size=None): tloss, entloss, regloss = 0, 0, 0 if batch_size is None: tloss, entloss, regloss = sess.run( [self.loss, self.entropy_loss, self.reg_loss], feed_dict={ self.feat_index: feat_index, self.feat_value: feat_value, self.label: label, self.keep_prob: 1, self.is_training: False }) else: data = Dataset(feat_value, feat_index, label, batch_size, shuffle=False) for i, (feat_index, feat_value, label) in enumerate(data, 1): _tloss, _entloss, _regloss = sess.run( [self.loss, self.entropy_loss, self.reg_loss], feed_dict={ self.feat_index: feat_index, self.feat_value: feat_value, self.label: label, self.keep_prob: 1, self.is_training: False }) tloss = tloss + (_tloss - tloss) / i entloss = entloss + (_entloss - entloss) / i regloss = regloss + (_regloss - regloss) / i return tloss, entloss, regloss
def predict(self, sess, feat_index, feat_value, batch_size=None): if batch_size is None: prob = sess.run( [self.prob], feed_dict={ self.feat_index: feat_index, self.feat_value: feat_value, self.keep_prob: 1, self.is_training: False })[0] else: data = Dataset(feat_value, feat_index, [None] * len(feat_index), batch_size, shuffle=False) probs = [] for feat_index, feat_value, _ in data: prob = sess.run( [self.prob], feed_dict={ self.feat_index: feat_index, self.feat_value: feat_value, self.keep_prob: 1, self.is_training: False })[0] probs.append(prob.ravel()) prob = np.concatenate(probs) return prob.ravel()
def Predict(self, img_path, vis=True): ''' User function: Run inference on image and visualize it. Output mask saved as output_mask.npy Args: img_path (str): Relative path to the image file vis (bool): If True, predicted mask is displayed. Returns: list: List of bounding box locations of predicted objects along with classes. ''' dirPath = "tmp_test" if (os.path.isdir(dirPath)): shutil.rmtree(dirPath) os.mkdir(dirPath) os.mkdir(dirPath + "/img_dir") os.mkdir(dirPath + "/gt_dir") os.system("cp " + img_path + " " + dirPath + "/img_dir") os.system("cp " + img_path + " " + dirPath + "/gt_dir") x_test_dir = dirPath + "/img_dir" y_test_dir = dirPath + "/gt_dir" if (self.system_dict["params"]["image_shape"][0] % 32 != 0): self.system_dict["params"]["image_shape"][0] += ( 32 - self.system_dict["params"]["image_shape"][0] % 32) if (self.system_dict["params"]["image_shape"][1] % 32 != 0): self.system_dict["params"]["image_shape"][1] += ( 32 - self.system_dict["params"]["image_shape"][1] % 32) preprocess_input = sm.get_preprocessing( self.system_dict["params"]["backbone"]) test_dataset = Dataset( x_test_dir, y_test_dir, self.system_dict["params"]["classes_dict"], classes_to_train=self.system_dict["params"]["classes_to_train"], augmentation=get_validation_augmentation( self.system_dict["params"]["image_shape"][0], self.system_dict["params"]["image_shape"][1]), preprocessing=get_preprocessing(preprocess_input), ) test_dataloader = Dataloder(test_dataset, batch_size=1, shuffle=False) image, gt_mask = test_dataset[0] image = np.expand_dims(image, axis=0) pr_mask = self.system_dict["local"]["model"].predict(image).round() np.save("output_mask.npy", pr_mask) if (vis): visualize( image=denormalize(image.squeeze()), pr_mask=pr_mask[..., 0].squeeze(), )
def learn(self, transition, player_id, batch_size=64, epochs=5): """Update the strategy with the experience recorded in `transition`""" transition = transition.pov(player_id) ds = Dataset(transition.next_board, transition.reward) ld = DataLoader(ds, batch_size=batch_size, shuffle=True) for _ in range(epochs): for idx, (X, y) in enumerate(ld): self.strategy.update(X, y)
def train(self): lr = self.opts.lr self.sess.run(self.init) train_set = Dataset(self.opts) train_size = train_set.__len__() for epoch in range(1, self.opts.num_epochs): batch_num = 0 for batch_begin, batch_end in zip(range(0, train_size, self.opts.batch_size), \ range(self.opts.batch_size, train_size, self.opts.batch_size)): begin_time = time.time() input_ptv, input_oct, gt_img = train_set.load_batch( batch_begin, batch_end) feed_dict = { self.true_images: gt_img, self.input_ptv: input_ptv, self.lr: lr, self.input_oct: input_oct } _, loss, summary = self.sess.run( [self.optimizer, self.loss, self.summaries], feed_dict=feed_dict) batch_num += 1 self.writer.add_summary( summary, epoch * (train_size / self.opts.batch_size) + batch_num) if batch_num % self.opts.display == 0: rem_time = (time.time() - begin_time) * ( self.opts.num_epochs - epoch) * (train_size / self.opts.batch_size) log = '-' * 20 log += ' Epoch: {}/{}|'.format(epoch, self.opts.num_epochs) log += ' Batch Number: {}/{}|'.format( batch_num, train_size / self.opts.batch_size) log += ' Batch Time: {}\n'.format(time.time() - begin_time) log += ' Remaining Time: {:0>8}\n'.format( datetime.timedelta(seconds=rem_time)) log += ' lr: {} loss: {}\n'.format(lr, loss) print(log) # if epoch % self.opts.lr_decay == 0 and batch_num == 1: # lr *= self.opts.lr_decay_factor if epoch % self.opts.ckpt_frq == 0 and batch_num == 1: self.saver.save( self.sess, "ckpt/" + "{}_{}_{}".format(epoch, lr, loss))
def main(): # load configurations config = Config() re_train = False # create word and tag processor word_processor = Processor(config.word_vocab_filename, config.char_vocab_filename, lowercase=True, use_chars=True, allow_unk=True) tag_processor = Processor(config.tag_filename) # load test dataset train_set = Dataset(config.train_filename, config.tag_idx, word_processor, tag_processor, max_iter=config.max_iter) dev_set = Dataset(config.dev_filename, config.tag_idx, word_processor, tag_processor, max_iter=config.max_iter) test_set = Dataset(config.test_filename, config.tag_idx, word_processor, tag_processor, max_iter=config.max_iter) # build model model = SeqLabelModel(config) model.restore_last_session(ckpt_path='ckpt/{}/'.format(config.train_task)) # train if re_train: model.train(train_set, dev_set, test_set) # test model.evaluate(test_set, eval_dev=False) # interact idx_to_tag = {idx: tag for tag, idx in config.tag_vocab.items()} interactive_shell(model, word_processor, idx_to_tag)
def load(modelid): os.environ["CUDA_VISIBLE_DEVICES"] = '0' logger = logging.getLogger() logger.setLevel(logging.INFO) handler = logging.FileHandler("./log/log.txt", mode='w') handler.setLevel(logging.INFO) console = logging.StreamHandler() console.setLevel(logging.INFO) formatter = logging.Formatter( "%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s" ) handler.setFormatter(formatter) console.setFormatter(formatter) logger.addHandler(handler) logger.addHandler(console) with open('./dicts/word2id.pickle', 'rb') as f: word2id = pickle.load(f) with open('./dicts/kb2id.pickle', 'rb') as f: kb2id = pickle.load(f) with open('./dicts/wordemb.pickle', 'rb') as f: wordemb = pickle.load(f) with open('./dicts/kbemb.pickle', 'rb') as f: kbemb = pickle.load(f) with open('./dicts/wordlist.json', 'r') as f: wordlist = json.load(f) wordlist = ['<EOS>', '<SOS>'] + wordlist flags = tf.flags flags.DEFINE_integer('hidden', 600, "") flags.DEFINE_integer('word_vocab_size', len(word2id), "") flags.DEFINE_integer('word_emb_dim', 300, "") flags.DEFINE_integer('kb_vocab_size', len(kb2id), "") flags.DEFINE_integer('kb_emb_dim', 100, "") flags.DEFINE_integer('maxlen', 35, "") flags.DEFINE_integer('batch', 128, "") flags.DEFINE_integer('epoch_num', 50, "") flags.DEFINE_boolean('is_train', False, "") flags.DEFINE_float('max_grad_norm', 0.1, "") flags.DEFINE_float('lr', 0.00025, "") config = flags.FLAGS valid_file = './sq/annotated_fb_data_train.txt' valid_dset = Dataset(valid_file, max_cnt=128) with tf.variable_scope('model'): model = Model(config, word_emb_mat=wordemb, kb_emb_mat=kbemb) saver = tf.train.Saver() with tf.Session() as sess: saver.restore(sess, './savemodel/model' + str(modelid) + '.pkl') out_idx = model.decode(sess, valid_dset) sentences = [] for s in out_idx: words = [] for w in s: words.append(wordlist[w]) sentence = ' '.join(words) sentences.append(sentence) with open('output.json', 'w') as f: json.dump(sentences, f)
def main(): args = parser.parse_args() # classifier if args.classifier is not None: snapshot = torch.load(args.classifier, map_location=lambda s, _: s) classifier = Classifier(snapshot['channels']) classifier.load_state_dict(snapshot['model']) else: classifier = None # dataset raw_loader = torch.utils.data.DataLoader(Dataset( os.path.join(DATA_DIR, 'raw')), batch_size=args.batch, shuffle=True, drop_last=True) noised_loader = torch.utils.data.DataLoader(Dataset( os.path.join(DATA_DIR, 'noised_tgt')), batch_size=args.batch, shuffle=True, drop_last=True) # model generator_f = Generator(args.channels) generator_r = Generator(args.channels) discriminator_f = Discriminator(args.channels) discriminator_r = Discriminator(args.channels) # train trainer = Trainer(generator_f, generator_r, discriminator_f, discriminator_r, classifier, args.gpu) for epoch in range(args.epoch): trainer.train(noised_loader, raw_loader, epoch < args.epoch // 10) print('[{}] {}'.format(epoch, trainer), flush=True) snapshot = { 'channels': args.channels, 'model': generator_f.state_dict() } torch.save(snapshot, '{}.tmp'.format(args.file)) os.rename('{}.tmp'.format(args.file), args.file)
def main(): tf.compat.v1.disable_eager_execution() # dataset dset = Dataset(config) dset.build() config.vocab_size = len(dset.word2id) config.pos_size = len(dset.pos2id) config.ner_size = len(dset.ner2id) config.dec_start_id = dset.word2id["_GOO"] config.dec_end_id = dset.word2id["_EOS"] config.pad_id = dset.pad_id config.stop_words = dset.stop_words model = LatentBow(config) with tf.compat.v1.variable_scope(config.model_name): model.build() # controller controller = Controller(config) controller.train(model, dset)
def main(config): # Initialise the model type and arguments model, args = init_trainer(config) logger.info(args) # Read training data dataset = Dataset(args['data_path'], args) corpus = dataset.get_corpus() # Train model model.train(corpus) # Save model if args['save_model'] == True: # Save run logger.info('Saving Model') model.save(args['model_dir']) dataset.save(args['data_path']) # Perform validation valid = Validation() x = model.get_vectors() df = dataset.get_df() # valid.plot_pca(x, df['variety_region']) results = valid.cluster_similarities(x, df) logger.info(results) if args['save_validation'] == True: logger.info('Saving Validation') config['output'] = results['similarity'] with open(args['validation_dir'] + '{}.pkl'.format(datetime.now()), "wb") as pickleFile: pickle.dump(config, pickleFile)
def train_model(train_file_path, save=False): """Training and saving the spam classifier model. Args: train_file_path ([str]): [Path of the training set csv file] save (bool, optional): [If true a new model will be saved]. Defaults to False. """ # Loading data dt = Dataset(train_file_path) x_train, y_train = dt.get_train_data() x_val, y_val = dt.get_val_data() # Fitting model classifier = MultinomialNB() classifier.fit(x_train, y_train) # Save model file to be used in future inferences if save: file_path = os.path.join(parent_path, 'model/spam_detection_model.pkl') with open(file_path, 'wb') as fp: pickle.dump(classifier, fp) dt.save_vectorizer(os.path.join(parent_path, 'model/vectorizer.pkl')) print("New model saved.") # Testing on validation subset predicted = classifier.predict(x_val) actual = y_val.tolist() # Printing results print('Accuracy: %.3f' % accuracy_score(actual, predicted)) print('F-Measure: %.3f' % f1_score(actual, predicted, average='binary')) print('Confusion Matrix:') print(confusion_matrix(actual, predicted)) print('Report:', classification_report(actual, predicted))
def run(): print('Loading data...') with open('data/data_emb', 'rb') as f: all_sets, embs, word2idx = pickle.load(f) emb_layer = nn.Embedding(embs.shape[0], embs.shape[1]) emb_layer.weight = nn.Parameter(torch.from_numpy(embs)) model = Model(emb_layer).cuda() optimizer = torch.optim.Adam(model.parameters(), lr=hparams['learning_rate']) train_set = Dataset(all_sets[0], shuffle=True, pad_keys=('q1', 'q2')) dev_set = Dataset(all_sets[1], shuffle=False, pad_keys=('q1', 'q2')) test_set = Dataset(all_sets[2], shuffle=False, pad_keys=('q1', 'q2')) step = 0 sum_loss = 0 dev_best = 0 test_score = 0 print("Starting training...") print(hparams) start_time = time.time() for epoch in range(hparams['max_epoch']): batches, batch_lengths = train_set.get_batches(hparams['batch_size'], ('q1', 'q2', 'y')) for b_data, b_lengths in zip(batches, batch_lengths): sum_loss += run_batch(b_data, b_lengths, model, optimizer) step += 1 if step % hparams['display_step'] == 0: avg_loss = sum_loss / hparams['display_step'] sum_loss = 0 dev_score = run_epoch_eval(dev_set, model) out_str = f'Epoc {epoch} iter {step} took {time.time() - start_time:.1f}s\n' \ f'loss:\t{avg_loss:.5f}\tdev score:\t{dev_score:.4f}' if dev_score > dev_best: dev_best = dev_score output_file = f'pred/{get_script_short_name(__file__)}.pred' test_score = run_epoch_eval(test_set, model, output_file) out_str += f'\t*** New best dev ***\ttest score:\t{test_score:.4f}' print(out_str) start_time = time.time() print('Best model on dev: dev:{:.4f}\ttest:{:.4f}'.format( dev_best, test_score))
def test(): with torch.no_grad(): dataset = Dataset('val', args) print('Start Testing, Data Length:', len(dataset)) loader = dataset2dataloader(dataset, args.batch_size, args.num_workers, shuffle=False) print('start testing') v_acc = [] entropy = [] acc_mean = [] total = 0 cons_acc = 0.0 cons_total = 0.0 attns = [] for (i_iter, input) in enumerate(loader): video_model.eval() tic = time.time() video = input.get('video').cuda(non_blocking=True) label = input.get('label').cuda(non_blocking=True) total = total + video.size(0) names = input.get('name') border = input.get('duration').cuda(non_blocking=True).float() with autocast(): if (args.border): y_v = video_model(video, border) else: y_v = video_model(video) v_acc.extend((y_v.argmax(-1) == label).cpu().numpy().tolist()) toc = time.time() if (i_iter % 10 == 0): msg = '' msg = add_msg(msg, 'v_acc={:.5f}', np.array(v_acc).reshape(-1).mean()) msg = add_msg(msg, 'eta={:.5f}', (toc - tic) * (len(loader) - i_iter) / 3600.0) print(msg) acc = float(np.array(v_acc).reshape(-1).mean()) msg = 'v_acc_{:.5f}_'.format(acc) return acc, msg
def minitrain(config, train_file, test_file, wordlist, kblist): train_dset = Dataset(train_file) test_dset = testDataset(test_file, shuffle=False) with tf.variable_scope('model'): model = Model(config, word_emb_mat=wordemb, kb_emb_mat=kbemb) config.is_train = False config.batch = 200 with tf.variable_scope('model', reuse=True): mtest = Model(config, word_emb_mat=wordemb, kb_emb_mat=kbemb) saver = tf.train.Saver() tfconfig = tf.ConfigProto() # tfconfig.gpu_options.allow_growth = True sess = tf.Session(config=tfconfig) # writer = tf.summary.FileWriter('./graph', sess.graph) sess.run(tf.global_variables_initializer()) num_batch = int(math.ceil(train_dset.datasize / model.batch)) for ei in range(model.epoch_num): train_dset.current_index = 0 loss_iter = 0.0 for bi in tqdm(range(num_batch)): mini_batch = train_dset.get_mini_batch(model.batch) if mini_batch == None: break triples, questions, qlen, subnames = mini_batch feed_dict = {} feed_dict[model.triple] = triples feed_dict[model.question] = questions feed_dict[model.qlen] = qlen feed_dict[model.keep_prob] = 1.0 loss, train_op, out_idx = sess.run(model.out, feed_dict=feed_dict) # writer.add_graph(sess.graph) loss_iter += loss loss_iter /= num_batch logging.info('iter %d, train loss: %f' % (ei, loss_iter)) # model.valid_model(sess, valid_dset, ei, saver) mtest.decode_test_with_full_questions(sess, test_dset, ei, wordlist, kblist, saver, dir='./output_newdata')
def split_batch_by_box_num(batches, box_batch_size): batchIdxs, batch_datas = batches newdata = [] num_gpu = len( batch_datas ) # each is a Dataset instance, d.data['img'] is a one item list num_boxes = [ batch_datas[i].data['gt'][0]['boxes'].shape[0] for i in xrange(num_gpu) ] max_num_box = max(num_boxes) min_num_box = min(num_boxes) split_into_num_batch = int(math.ceil(max_num_box / float(box_batch_size))) # the indexes for each inner batch # the batch with not enough will fill with 0, the first box each_batch_selected_indexes = [ grouper(range(num_boxes[i]), box_batch_size, fillvalue=0) for i in xrange(num_gpu) ] # still need to handle some batch has not enough batch t2 = [] for b in each_batch_selected_indexes: if len(b) < split_into_num_batch: need = split_into_num_batch - len(b) b = b + [[0 for _ in xrange(box_batch_size)] for _ in xrange(need)] t2.append(b) for i in xrange(split_into_num_batch): this_datas = [] for j in xrange(num_gpu): selected = each_batch_selected_indexes[j][i] temp = { "imgs": [batch_datas[j].data['imgs'][0]], "imgdata": [batch_datas[j].data['imgdata'][0]], "resized_image": [batch_datas[j].data['resized_image'][0]], 'gt': [{ "boxes": batch_datas[j].data['gt'][0]['boxes'][selected, :], #"labels": batch_datas[j].data['gt'][0]['labels'][selected], }], } this_datas.append(temp) newdata.append( (batchIdxs, [Dataset(this_data) for this_data in this_datas])) return newdata
def productionize(save_loc: str, model_save_loc: str) -> None: if not os.path.exists(model_save_loc): raise RuntimeError('No such trained model exists: "{}". Run the ' 'training script first!'.format(model_save_loc)) if not os.path.exists(save_loc): os.makedirs(save_loc) dataset = Dataset() model = ks.models.load_model(model_save_loc) print('Converting model to Tensorflow-JS format') save_model_tfjs(model, save_loc) print('Saving accessory JSON files') save_accessory_json(dataset, save_loc)
def fit_encoder(X, params): varying = bool(np.isnan(np.sum(X))) train = torch.from_numpy(X) if params['cuda']: train = train.cuda(params['gpu']) train_torch_dataset = Dataset(X) train_generator = torch.utils.data.DataLoader( train_torch_dataset, batch_size=params['batch_size'], shuffle=True) encoder = causal_cnn.CausalCNNEncoder(params['in_channels'], params['channels'], params['depth'], params['reduced_size'], params['out_channels'], params['kernel_size']) if params['cuda']: encoder.cuda(params['gpu']) encoder.double() optimizer = torch.optim.Adam(encoder.parameters(), lr=params['lr']) # configure("BasisAnalysis/run-epoch20", flush_secs=2) # wandb.init(project="BasisAnalysis") losses = {'training': []} for i in range(params['epochs']): for batch in train_generator: if params['cuda']: batch = batch.cuda(params['gpu']) optimizer.zero_grad() if not varying: _loss = triplet_loss.TripletLoss(params['compared_length'], params['nb_random_samples'], params['negative_penalty']) else: _loss = triplet_loss.TripletLossVaryingLength( params['compared_length'], params['nb_random_samples'], params['negative_penalty']) loss = _loss(batch, encoder, train) loss.backward() optimizer.step() print('[LOSS] Epoch {} : {}'.format(i + 1, loss)) # log_value('loss', loss, i) losses['training'].append(loss) # print(torch.cuda.get_device_name(0)) # wandb.log(losses) return encoder, losses
def test(self, test_file): if not self.test_data: dataset = Dataset(test_file, featuresCols=range(84), targetCol=[84]) self.test_data = th.utils.data.DataLoader(dataset, batch_size=20) total_loss = 0 for data, target in self.test_data: data = data.float() pred = self.model(data) target = target.float().reshape(-1, 1).to(self.model.device) loss = self.loss_func(pred, target) total_loss += loss.item() return total_loss / len(dataset)
def test(self, test_file): if not self.test_data: dataset = Dataset(test_file, featuresCols=range(0, 50), targetCol=[50, 51]) self.test_data = th.utils.data.DataLoader(dataset, batch_size=20) total_loss = 0 for data, target in self.test_data: data = data.float() pred = self.model(data) target, reward = th.split(target, [1, 1], dim=1) target = target.flatten() loss = self.loss_func(pred, target) total_loss += th.mean(loss * reward).item() return total_loss / len(dataset)
def encode(self, X, batch_size=50): """ Calculate the representation of the time series Args: X: Time series dataset """ test = Dataset(X) test_generator = torch.utils.data.DataLoader(test, batch_size=batch_size) features = numpy.zeros((numpy.shape(X)[0], self.out_channels)) self.encoder = self.encoder.eval() count = 0 with torch.no_grad(): for batch in test_generator: batch = batch.to(self.device) features[ count * batch_size: (count + 1) * batch_size ] = self.encoder(batch).cpu() count += 1 return features
def TrainModel(args, model, num_clips, fourier=False): # Datasets trainfunc = Dataset(TRAINING_DATASET,range(1, num_clips)) trainloader = torch.utils.data.DataLoader(trainfunc, batch_size=args.batch_size, shuffle=True, num_workers=0) # Optimize and Loss optimizer = torch.optim.Adam(model.parameters(), args.lr) if not args.variational: lossfunc = nn.MSELoss() else: lossfunc = vae_loss model.train() # Train eval_results = [] for epoch in range(args.num_epochs): for i, data in enumerate(trainloader): optimizer.zero_grad() if not fourier: inputs = data.to(args.device) outputs = model(inputs) loss = lossfunc(outputs, inputs) loss.backward() else: comp_input = torch.stft(data, n_fft=2048, window=torch.hann_window(2048), return_complex=True) real, imag = comp_input.real.unsqueeze(0).to(args.device), comp_input.imag.unsqueeze(0).to(args.device) realOUT, imagOUT = model(real, imag) loss_real = lossfunc(realOUT, real) loss_imag = lossfunc(imagOUT, imag) loss_real.backward() loss_imag.backward() optimizer.step() eval_results.append(TestModel(args, model, TRAINING_DATASET, num_clips, fourier=fourier)) print(f'[Epoch {epoch}]\tEvaluation Loss: {eval_results[-1]}') print('Finished Training') return eval_results
q_i2w, q_w2i = load_vocab('datasets/coco/train/questions.vocab') print("Load word2Vec") embeddings = {} for n,l in enumerate(open(embedding_path,encoding='utf-8')): l = l.strip().split() w = l[0] vec = [float(x) for x in l[1:]] embeddings[w] = vec emb,c = load_emb_matrix(q_i2w, embeddings) del embeddings train_set = Dataset("/home/hbenyounes/vqa/datasets/coco/train/images.feat", "/home/hbenyounes/vqa/datasets/coco/train/img_ids.txt", "/home/hbenyounes/vqa/datasets/coco/train/questions.idxs", "/home/hbenyounes/vqa/datasets/coco/train/answers.idxs") test_set = Dataset("/home/hbenyounes/vqa/datasets/coco/test/images.feat", "/home/hbenyounes/vqa/datasets/coco/test/img_ids.txt", "/home/hbenyounes/vqa/datasets/coco/test/questions.idxs", "/home/hbenyounes/vqa/datasets/coco/test/answers.idxs") if not exists(join(root_path, model_name)): mkdir(join(root_path, model_name)) q_i2w, q_w2i = load_vocab('datasets/coco/train/questions.vocab') a_i2w, a_w2i = load_vocab('datasets/coco/train/answers.vocab') Nq = len(q_i2w) Na = len(a_i2w)