Python Dataset Exemples, utils.Dataset Python Exemples

Exemple #1

0

Afficher le fichier

def main():
    # create configurations
    print('load pre-defined configs and pre-processed dataset...')
    config = Config()
    # create word and tag processor
    word_processor = Processor(config.word_vocab_filename,
                               config.char_vocab_filename,
                               lowercase=True,
                               use_chars=True,
                               allow_unk=True)
    tag_processor = Processor(config.tag_filename)
    # load train, development and test dataset
    train_set = Dataset(config.train_filename,
                        config.tag_idx,
                        word_processor,
                        tag_processor,
                        max_iter=config.max_iter)
    dev_set = Dataset(config.dev_filename,
                      config.tag_idx,
                      word_processor,
                      tag_processor,
                      max_iter=config.max_iter)
    test_set = Dataset(config.test_filename,
                       config.tag_idx,
                       word_processor,
                       tag_processor,
                       max_iter=config.max_iter)
    # build model
    model = SeqLabelModel(config)
    model.train(train_set, dev_set, test_set)
    # testing
    model.evaluate(test_set, eval_dev=False)
    # interact
    idx_to_tag = {idx: tag for tag, idx in config.tag_vocab.items()}
    interactive_shell(model, word_processor, idx_to_tag)

Exemple #2

0

Afficher le fichier

def train():
    to_tfrecord = To_tfrecords(txt_file='trainval.txt')
    to_tfrecord.transform()
    train_generator = Dataset(filenames='data/tfr_voc/trainval.tfrecords',
                              enhance=True)
    train_dataset = train_generator.transform()
    yolo = YOLONET()

Exemple #3

0

Afficher le fichier

Fichier : EmTrainer.py Projet : Gyyz/RoutingEmb

 def get_dataset(self):
     ''''''
     self.train_dataset = Dataset(self.config.train_file, self.vocab,
                                  self.config)
     self.dev_dataset = Dataset(self.config.dev_file, self.vocab,
                                self.config)
     self.test_dataset = Dataset(self.config.test_file, self.vocab,
                                 self.config)

Exemple #4

0

Afficher le fichier

Fichier : siamese1.py Projet : EnkrateiaLucca/gen_nets

    def get_data_prep(self, name="mnist"):
        """Loads and preprocess the traning and testing data. Replaces the
        standard ways of loading conventional datasets, makes it easier to
        use different ones to experiemnt with the model

        Parameters
        ----------
        name : string
            A string with the name of standard datasets whithin: mnist,
            fashion_mnist, cifar10 and cifar100.

        Returns
        -------
        type:numpy arrays
            Returns numpy arryas of train and test ready to be fed into the
            model.

        """

        dataset = Dataset(name)
        dataprep = Dataprep(dataset.x_train, dataset.y_train, dataset.x_test,
                            dataset.y_test)
        x_train = dataprep.x_train
        y_train = dataprep.y_train
        x_test = dataprep.x_test
        y_test = dataprep.y_test
        return x_train, y_train, x_test, y_test

Exemple #5

0

Afficher le fichier

Fichier : step2.2-training_align-and-translate-char.py Projet : bupticybee/en2ch_translate

def calc_test_loss(test_set = Dataset(test_x,test_y),display=True):
    accs = []
    worksum = int(len(test_x) / batch_size)
    loss_list = []
    predict_list = []
    target_list = []
    source_list = []
    pb = ProgressBar(worksum=worksum,info="validating...",auto_display=display)
    pb.startjob()
    #test_set = Dataset(test_x,test_y)
    for j in range(worksum):
        batch_x,batch_y = test_set.next_batch(batch_size)
        lx = [seq_max_len] * batch_size
        ly = [seq_max_len] * batch_size
        bx = [np.sum(m > 0) for m in batch_x]
        by = [np.sum(m > 0) for m in batch_y]
        tmp_loss,tran = session.run([train_loss,translations],feed_dict={x:batch_x,y:batch_y,
                                                     y_in:
                                                     np.concatenate((
                                                     np.ones((batch_y.shape[0],1),dtype=np.int) * ch2ind['<go>'],batch_y[:,:-1]) ,axis=1)
                                                     ,x_len:lx,y_len:ly,
                                                                        y_real_len:by,
                                                                        x_real_len:bx})
        loss_list.append(tmp_loss)
        tmp_acc = cal_acc(tran,batch_y)
        accs.append(tmp_acc)
        predict_list += [i for i in tran]
        target_list += [i for i in batch_y]
        source_list += [i for i in batch_x]
        pb.complete(1)
    return np.average(loss_list),np.average(accs),get_bleu_score(predict_list,target_list),predict_list,target_list,source_list

Exemple #6

0

Afficher le fichier

def eval_bs(test_set: Dataset, vocab: Vocab, model: Seq2Seq, params: Params):
    test_gen = test_set.generator(1, vocab, None, True if params.pointer else False)
    n_samples = int(params.test_sample_ratio * len(test_set.pairs))

    if params.test_save_results and params.model_path_prefix:
        result_file = tarfile.open(params.model_path_prefix + ".results.tgz", 'w:gz')
    else:
        result_file = None

    model.eval()
    r1, r2, rl, rsu4 = 0, 0, 0, 0
    prog_bar = tqdm(range(1, n_samples + 1))
    for i in prog_bar:
        batch = next(test_gen)
        scores, file_content = eval_bs_batch(batch, model, vocab, pack_seq=params.pack_seq,
                                             beam_size=params.beam_size,
                                             min_out_len=params.min_out_len,
                                             max_out_len=params.max_out_len,
                                             len_in_words=params.out_len_in_words,
                                             details=result_file is not None)
        if file_content:
            file_content = file_content.encode('utf-8')
            file_info = tarfile.TarInfo(name='%06d.txt' % i)
            file_info.size = len(file_content)
            result_file.addfile(file_info, fileobj=BytesIO(file_content))
        if scores:
            r1 += scores[0]['1_f']
            r2 += scores[0]['2_f']
            rl += scores[0]['l_f']
            rsu4 += scores[0]['su4_f']
            prog_bar.set_postfix(R1='%.4g' % (r1 / i * 100), R2='%.4g' % (r2 / i * 100),
                                 RL='%.4g' % (rl / i * 100), RSU4='%.4g' % (rsu4 / i * 100))

Exemple #7

0

Afficher le fichier

def main(_):

    data_object = Dataset()

    cg = CGAN(data_ob=data_object,
              sample_dir=FLAGS.sample_dir,
              output_size=FLAGS.output_size,
              learn_rate=FLAGS.learn_rate,
              batch_size=FLAGS.batch_size,
              z_dim=FLAGS.z_dim,
              y_dim=FLAGS.y_dim,
              log_dir=FLAGS.log_dir,
              model_path=FLAGS.model_path,
              visua_path=FLAGS.visua_path,
              y_min=FLAGS.y_min,
              y_max=FLAGS.y_max)

    cg.build_model()

    if FLAGS.op == 0:
        cg.train()
    elif FLAGS.op == 1:
        cg.test()
    else:
        print("op should be either 0 or 1.")
        assert (False)

Exemple #8

0

Afficher le fichier

Fichier : evaluating.py Projet : rshube/music-vae

def TestModel(args, model, dataset, num_clips, fourier=False):
    trainfunc = Dataset(dataset, range(1, num_clips))
    trainloader = torch.utils.data.DataLoader(trainfunc,
                                              batch_size=args.batch_size,
                                              shuffle=False,
                                              num_workers=0)
    model.eval()
    if not args.variational:
        lossfunc = nn.MSELoss()
    else:
        lossfunc = vae_loss

    losses = []
    for i, data in enumerate(trainloader):
        if not fourier:
            inputs = data.to(args.device)
            outputs = model(inputs)
            loss = lossfunc(outputs, inputs)
            losses.append(loss.item())
        else:
            comp_input = torch.stft(data.squeeze(),
                                    n_fft=2048,
                                    window=torch.hann_window(2048),
                                    return_complex=True)
            real, imag = comp_input.real.to(args.device), comp_input.imag.to(
                args.device)
            real, imag = real[None,
                              None], imag[None,
                                          None]  # unsqueeze twice in 0th dim
            realOUT, imagOUT = model(real, imag)
            loss_real = lossfunc(realOUT, real)
            loss_imag = lossfunc(imagOUT, imag)
            losses.append(loss_real.item() + loss_imag.item())

    return np.mean(losses)

Exemple #9

0

Afficher le fichier

Fichier : test2.py Projet : MartingaleField/MartingaleField.github.io

def encode(X, encoder, params):
    varying = bool(np.isnan(np.sum(X)))

    test = Dataset(X)
    test_generator = torch.utils.data.DataLoader(test, batch_size=1)
    features = np.zeros((X.shape[0], params['out_channels']))
    encoder = encoder.eval()

    count = 0
    with torch.no_grad():
        if not varying:
            for batch in test_generator:
                if params['cuda']:
                    batch = batch.cuda(params['gpu'])
                features[count:(count + 1)] = encoder(batch).cpu()
                count += 1
        else:
            for batch in test_generator:
                if params['cuda']:
                    batch = batch.cuda(params['gpu'])
                length = batch.size(2) - torch.sum(torch.isnan(
                    batch[0, 0])).data.cpu().numpy()
                features[count:(count + 1)] = encoder(
                    batch[:, :, :length]).cpu()
                count += 1
    return features

Exemple #10

0

Afficher le fichier

Fichier : deepFM.py Projet : sameul-yuan/deep_learning_work

    def evaluate(self, sess, feat_index, feat_value, label, batch_size=None):
        tloss, entloss, regloss = 0, 0, 0
        if batch_size is None:
            tloss, entloss, regloss = sess.run(
                [self.loss, self.entropy_loss, self.reg_loss],
                feed_dict={
                    self.feat_index: feat_index,
                    self.feat_value: feat_value,
                    self.label: label,
                    self.keep_prob: 1,
                    self.is_training: False
                })
        else:
            data = Dataset(feat_value,
                           feat_index,
                           label,
                           batch_size,
                           shuffle=False)
            for i, (feat_index, feat_value, label) in enumerate(data, 1):
                _tloss, _entloss, _regloss = sess.run(
                    [self.loss, self.entropy_loss, self.reg_loss],
                    feed_dict={
                        self.feat_index: feat_index,
                        self.feat_value: feat_value,
                        self.label: label,
                        self.keep_prob: 1,
                        self.is_training: False
                    })
                tloss = tloss + (_tloss - tloss) / i
                entloss = entloss + (_entloss - entloss) / i
                regloss = regloss + (_regloss - regloss) / i

        return tloss, entloss, regloss

Exemple #11

0

Afficher le fichier

Fichier : deepFM.py Projet : sameul-yuan/deep_learning_work

    def predict(self, sess, feat_index, feat_value, batch_size=None):
        if batch_size is None:
            prob = sess.run(
                [self.prob],
                feed_dict={
                    self.feat_index: feat_index,
                    self.feat_value: feat_value,
                    self.keep_prob: 1,
                    self.is_training: False
                })[0]
        else:
            data = Dataset(feat_value,
                           feat_index, [None] * len(feat_index),
                           batch_size,
                           shuffle=False)
            probs = []
            for feat_index, feat_value, _ in data:
                prob = sess.run(
                    [self.prob],
                    feed_dict={
                        self.feat_index: feat_index,
                        self.feat_value: feat_value,
                        self.keep_prob: 1,
                        self.is_training: False
                    })[0]
                probs.append(prob.ravel())

            prob = np.concatenate(probs)

        return prob.ravel()

Exemple #12

0

Afficher le fichier

Fichier : infer_segmentation.py Projet : ashishpatel26/Monk_Object_Detection

    def Predict(self, img_path, vis=True):
        '''
        User function: Run inference on image and visualize it. Output mask saved as output_mask.npy

        Args:
            img_path (str): Relative path to the image file
            vis (bool): If True, predicted mask is displayed.

        Returns:
            list: List of bounding box locations of predicted objects along with classes. 
        '''
        dirPath = "tmp_test"

        if (os.path.isdir(dirPath)):
            shutil.rmtree(dirPath)

        os.mkdir(dirPath)
        os.mkdir(dirPath + "/img_dir")
        os.mkdir(dirPath + "/gt_dir")

        os.system("cp " + img_path + " " + dirPath + "/img_dir")
        os.system("cp " + img_path + " " + dirPath + "/gt_dir")

        x_test_dir = dirPath + "/img_dir"
        y_test_dir = dirPath + "/gt_dir"

        if (self.system_dict["params"]["image_shape"][0] % 32 != 0):
            self.system_dict["params"]["image_shape"][0] += (
                32 - self.system_dict["params"]["image_shape"][0] % 32)

        if (self.system_dict["params"]["image_shape"][1] % 32 != 0):
            self.system_dict["params"]["image_shape"][1] += (
                32 - self.system_dict["params"]["image_shape"][1] % 32)

        preprocess_input = sm.get_preprocessing(
            self.system_dict["params"]["backbone"])
        test_dataset = Dataset(
            x_test_dir,
            y_test_dir,
            self.system_dict["params"]["classes_dict"],
            classes_to_train=self.system_dict["params"]["classes_to_train"],
            augmentation=get_validation_augmentation(
                self.system_dict["params"]["image_shape"][0],
                self.system_dict["params"]["image_shape"][1]),
            preprocessing=get_preprocessing(preprocess_input),
        )

        test_dataloader = Dataloder(test_dataset, batch_size=1, shuffle=False)

        image, gt_mask = test_dataset[0]
        image = np.expand_dims(image, axis=0)
        pr_mask = self.system_dict["local"]["model"].predict(image).round()
        np.save("output_mask.npy", pr_mask)

        if (vis):
            visualize(
                image=denormalize(image.squeeze()),
                pr_mask=pr_mask[..., 0].squeeze(),
            )

Exemple #13

0

Afficher le fichier

 def learn(self, transition, player_id, batch_size=64, epochs=5):
     """Update the strategy with the experience recorded in `transition`"""
     transition = transition.pov(player_id)
     ds = Dataset(transition.next_board, transition.reward)
     ld = DataLoader(ds, batch_size=batch_size, shuffle=True)
     for _ in range(epochs):
         for idx, (X, y) in enumerate(ld):
             self.strategy.update(X, y)

Exemple #14

0

Afficher le fichier

Fichier : architecture.py Projet : madan96/dose-prediction

    def train(self):
        lr = self.opts.lr
        self.sess.run(self.init)
        train_set = Dataset(self.opts)
        train_size = train_set.__len__()
        for epoch in range(1, self.opts.num_epochs):
            batch_num = 0
            for batch_begin, batch_end in zip(range(0, train_size, self.opts.batch_size), \
    range(self.opts.batch_size, train_size, self.opts.batch_size)):
                begin_time = time.time()
                input_ptv, input_oct, gt_img = train_set.load_batch(
                    batch_begin, batch_end)
                feed_dict = {
                    self.true_images: gt_img,
                    self.input_ptv: input_ptv,
                    self.lr: lr,
                    self.input_oct: input_oct
                }
                _, loss, summary = self.sess.run(
                    [self.optimizer, self.loss, self.summaries],
                    feed_dict=feed_dict)

                batch_num += 1
                self.writer.add_summary(
                    summary,
                    epoch * (train_size / self.opts.batch_size) + batch_num)
                if batch_num % self.opts.display == 0:
                    rem_time = (time.time() - begin_time) * (
                        self.opts.num_epochs - epoch) * (train_size /
                                                         self.opts.batch_size)
                    log = '-' * 20
                    log += ' Epoch: {}/{}|'.format(epoch, self.opts.num_epochs)
                    log += ' Batch Number: {}/{}|'.format(
                        batch_num, train_size / self.opts.batch_size)
                    log += ' Batch Time: {}\n'.format(time.time() - begin_time)
                    log += ' Remaining Time: {:0>8}\n'.format(
                        datetime.timedelta(seconds=rem_time))
                    log += ' lr: {} loss: {}\n'.format(lr, loss)
                    print(log)
                # if epoch % self.opts.lr_decay == 0 and batch_num == 1:
                #     lr *= self.opts.lr_decay_factor
                if epoch % self.opts.ckpt_frq == 0 and batch_num == 1:
                    self.saver.save(
                        self.sess,
                        "ckpt/" + "{}_{}_{}".format(epoch, lr, loss))

Exemple #15

0

Afficher le fichier

Fichier : restore_test.py Projet : buptdjd/neural_sequence_labeling

def main():
    # load configurations
    config = Config()

    re_train = False

    # create word and tag processor
    word_processor = Processor(config.word_vocab_filename,
                               config.char_vocab_filename,
                               lowercase=True,
                               use_chars=True,
                               allow_unk=True)
    tag_processor = Processor(config.tag_filename)

    # load test dataset
    train_set = Dataset(config.train_filename,
                        config.tag_idx,
                        word_processor,
                        tag_processor,
                        max_iter=config.max_iter)
    dev_set = Dataset(config.dev_filename,
                      config.tag_idx,
                      word_processor,
                      tag_processor,
                      max_iter=config.max_iter)
    test_set = Dataset(config.test_filename,
                       config.tag_idx,
                       word_processor,
                       tag_processor,
                       max_iter=config.max_iter)

    # build model
    model = SeqLabelModel(config)
    model.restore_last_session(ckpt_path='ckpt/{}/'.format(config.train_task))

    # train
    if re_train:
        model.train(train_set, dev_set, test_set)

    # test
    model.evaluate(test_set, eval_dev=False)
    # interact
    idx_to_tag = {idx: tag for tag, idx in config.tag_vocab.items()}
    interactive_shell(model, word_processor, idx_to_tag)

Exemple #16

0

Afficher le fichier

Fichier : load_and_decode.py Projet : Milozms/rnn_qg

def load(modelid):
    os.environ["CUDA_VISIBLE_DEVICES"] = '0'
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    handler = logging.FileHandler("./log/log.txt", mode='w')
    handler.setLevel(logging.INFO)
    console = logging.StreamHandler()
    console.setLevel(logging.INFO)
    formatter = logging.Formatter(
        "%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s"
    )
    handler.setFormatter(formatter)
    console.setFormatter(formatter)
    logger.addHandler(handler)
    logger.addHandler(console)
    with open('./dicts/word2id.pickle', 'rb') as f:
        word2id = pickle.load(f)
    with open('./dicts/kb2id.pickle', 'rb') as f:
        kb2id = pickle.load(f)
    with open('./dicts/wordemb.pickle', 'rb') as f:
        wordemb = pickle.load(f)
    with open('./dicts/kbemb.pickle', 'rb') as f:
        kbemb = pickle.load(f)
    with open('./dicts/wordlist.json', 'r') as f:
        wordlist = json.load(f)
        wordlist = ['<EOS>', '<SOS>'] + wordlist
    flags = tf.flags
    flags.DEFINE_integer('hidden', 600, "")
    flags.DEFINE_integer('word_vocab_size', len(word2id), "")
    flags.DEFINE_integer('word_emb_dim', 300, "")
    flags.DEFINE_integer('kb_vocab_size', len(kb2id), "")
    flags.DEFINE_integer('kb_emb_dim', 100, "")
    flags.DEFINE_integer('maxlen', 35, "")
    flags.DEFINE_integer('batch', 128, "")
    flags.DEFINE_integer('epoch_num', 50, "")
    flags.DEFINE_boolean('is_train', False, "")
    flags.DEFINE_float('max_grad_norm', 0.1, "")
    flags.DEFINE_float('lr', 0.00025, "")
    config = flags.FLAGS
    valid_file = './sq/annotated_fb_data_train.txt'
    valid_dset = Dataset(valid_file, max_cnt=128)
    with tf.variable_scope('model'):
        model = Model(config, word_emb_mat=wordemb, kb_emb_mat=kbemb)
    saver = tf.train.Saver()
    with tf.Session() as sess:
        saver.restore(sess, './savemodel/model' + str(modelid) + '.pkl')
        out_idx = model.decode(sess, valid_dset)
        sentences = []
        for s in out_idx:
            words = []
            for w in s:
                words.append(wordlist[w])
            sentence = ' '.join(words)
            sentences.append(sentence)
        with open('output.json', 'w') as f:
            json.dump(sentences, f)

Exemple #17

0

Afficher le fichier

def main():
    args = parser.parse_args()

    # classifier
    if args.classifier is not None:
        snapshot = torch.load(args.classifier, map_location=lambda s, _: s)
        classifier = Classifier(snapshot['channels'])
        classifier.load_state_dict(snapshot['model'])
    else:
        classifier = None

    # dataset
    raw_loader = torch.utils.data.DataLoader(Dataset(
        os.path.join(DATA_DIR, 'raw')),
                                             batch_size=args.batch,
                                             shuffle=True,
                                             drop_last=True)
    noised_loader = torch.utils.data.DataLoader(Dataset(
        os.path.join(DATA_DIR, 'noised_tgt')),
                                                batch_size=args.batch,
                                                shuffle=True,
                                                drop_last=True)

    # model
    generator_f = Generator(args.channels)
    generator_r = Generator(args.channels)
    discriminator_f = Discriminator(args.channels)
    discriminator_r = Discriminator(args.channels)

    # train
    trainer = Trainer(generator_f, generator_r, discriminator_f,
                      discriminator_r, classifier, args.gpu)

    for epoch in range(args.epoch):
        trainer.train(noised_loader, raw_loader, epoch < args.epoch // 10)
        print('[{}] {}'.format(epoch, trainer), flush=True)

        snapshot = {
            'channels': args.channels,
            'model': generator_f.state_dict()
        }
        torch.save(snapshot, '{}.tmp'.format(args.file))
        os.rename('{}.tmp'.format(args.file), args.file)

Exemple #18

0

Afficher le fichier

Fichier : main.py Projet : CalmJerome/latent_tagged_bow

def main():
    tf.compat.v1.disable_eager_execution()

    # dataset
    dset = Dataset(config)
    dset.build()
    config.vocab_size = len(dset.word2id)
    config.pos_size = len(dset.pos2id)
    config.ner_size = len(dset.ner2id)
    config.dec_start_id = dset.word2id["_GOO"]
    config.dec_end_id = dset.word2id["_EOS"]
    config.pad_id = dset.pad_id
    config.stop_words = dset.stop_words

    model = LatentBow(config)
    with tf.compat.v1.variable_scope(config.model_name):
        model.build()

    # controller
    controller = Controller(config)
    controller.train(model, dset)

Exemple #19

0

Afficher le fichier

Fichier : train.py Projet : elmidelange/CloudWine

def main(config):
    # Initialise the model type and arguments
    model, args = init_trainer(config)
    logger.info(args)
    # Read training data
    dataset = Dataset(args['data_path'], args)
    corpus = dataset.get_corpus()
    # Train model
    model.train(corpus)
    # Save model
    if args['save_model'] == True:
        # Save run
        logger.info('Saving Model')
        model.save(args['model_dir'])
        dataset.save(args['data_path'])
    # Perform validation
    valid = Validation()
    x = model.get_vectors()
    df = dataset.get_df()
    # valid.plot_pca(x, df['variety_region'])
    results = valid.cluster_similarities(x, df)
    logger.info(results)
    if args['save_validation'] == True:
        logger.info('Saving Validation')
        config['output'] = results['similarity']
        with open(args['validation_dir'] + '{}.pkl'.format(datetime.now()),
                  "wb") as pickleFile:
            pickle.dump(config, pickleFile)

Exemple #20

0

Afficher le fichier

def train_model(train_file_path, save=False):
    """Training and saving the spam classifier model.

    Args:
        train_file_path ([str]): [Path of the training set csv file]
        save (bool, optional): [If true a new model will be saved]. Defaults to False.
    """
    # Loading data
    dt = Dataset(train_file_path)
    x_train, y_train = dt.get_train_data()
    x_val, y_val = dt.get_val_data()

    # Fitting model
    classifier = MultinomialNB()
    classifier.fit(x_train, y_train)

    # Save model file to be used in future inferences
    if save:
        file_path = os.path.join(parent_path, 'model/spam_detection_model.pkl')
        with open(file_path, 'wb') as fp:
            pickle.dump(classifier, fp)
        dt.save_vectorizer(os.path.join(parent_path, 'model/vectorizer.pkl'))
        print("New model saved.")

    # Testing on validation subset
    predicted = classifier.predict(x_val)
    actual = y_val.tolist()

    # Printing results
    print('Accuracy: %.3f' % accuracy_score(actual, predicted))
    print('F-Measure: %.3f' % f1_score(actual, predicted, average='binary'))
    print('Confusion Matrix:')
    print(confusion_matrix(actual, predicted))        
    print('Report:', classification_report(actual, predicted))

Exemple #21

0

Afficher le fichier

Fichier : bilstm.py Projet : ParsonsZeng/DQI

def run():
    print('Loading data...')
    with open('data/data_emb', 'rb') as f:
        all_sets, embs, word2idx = pickle.load(f)
    emb_layer = nn.Embedding(embs.shape[0], embs.shape[1])
    emb_layer.weight = nn.Parameter(torch.from_numpy(embs))
    model = Model(emb_layer).cuda()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=hparams['learning_rate'])
    train_set = Dataset(all_sets[0], shuffle=True, pad_keys=('q1', 'q2'))
    dev_set = Dataset(all_sets[1], shuffle=False, pad_keys=('q1', 'q2'))
    test_set = Dataset(all_sets[2], shuffle=False, pad_keys=('q1', 'q2'))
    step = 0
    sum_loss = 0
    dev_best = 0
    test_score = 0
    print("Starting training...")
    print(hparams)
    start_time = time.time()
    for epoch in range(hparams['max_epoch']):
        batches, batch_lengths = train_set.get_batches(hparams['batch_size'],
                                                       ('q1', 'q2', 'y'))
        for b_data, b_lengths in zip(batches, batch_lengths):
            sum_loss += run_batch(b_data, b_lengths, model, optimizer)
            step += 1
            if step % hparams['display_step'] == 0:
                avg_loss = sum_loss / hparams['display_step']
                sum_loss = 0
                dev_score = run_epoch_eval(dev_set, model)
                out_str = f'Epoc {epoch} iter {step} took {time.time() - start_time:.1f}s\n' \
                          f'loss:\t{avg_loss:.5f}\tdev score:\t{dev_score:.4f}'
                if dev_score > dev_best:
                    dev_best = dev_score
                    output_file = f'pred/{get_script_short_name(__file__)}.pred'
                    test_score = run_epoch_eval(test_set, model, output_file)
                    out_str += f'\t*** New best dev ***\ttest score:\t{test_score:.4f}'
                print(out_str)
                start_time = time.time()
    print('Best model on dev: dev:{:.4f}\ttest:{:.4f}'.format(
        dev_best, test_score))

Exemple #22

0

Afficher le fichier

def test():

    with torch.no_grad():
        dataset = Dataset('val', args)
        print('Start Testing, Data Length:', len(dataset))
        loader = dataset2dataloader(dataset,
                                    args.batch_size,
                                    args.num_workers,
                                    shuffle=False)

        print('start testing')
        v_acc = []
        entropy = []
        acc_mean = []
        total = 0
        cons_acc = 0.0
        cons_total = 0.0
        attns = []

        for (i_iter, input) in enumerate(loader):

            video_model.eval()

            tic = time.time()
            video = input.get('video').cuda(non_blocking=True)
            label = input.get('label').cuda(non_blocking=True)
            total = total + video.size(0)
            names = input.get('name')
            border = input.get('duration').cuda(non_blocking=True).float()

            with autocast():
                if (args.border):
                    y_v = video_model(video, border)
                else:
                    y_v = video_model(video)

            v_acc.extend((y_v.argmax(-1) == label).cpu().numpy().tolist())
            toc = time.time()
            if (i_iter % 10 == 0):
                msg = ''
                msg = add_msg(msg, 'v_acc={:.5f}',
                              np.array(v_acc).reshape(-1).mean())
                msg = add_msg(msg, 'eta={:.5f}',
                              (toc - tic) * (len(loader) - i_iter) / 3600.0)

                print(msg)

        acc = float(np.array(v_acc).reshape(-1).mean())
        msg = 'v_acc_{:.5f}_'.format(acc)

        return acc, msg

Exemple #23

0

Afficher le fichier

Fichier : train_on_all_sq.py Projet : Milozms/rnn_qg

def minitrain(config, train_file, test_file, wordlist, kblist):
	train_dset = Dataset(train_file)
	test_dset = testDataset(test_file, shuffle=False)
	with tf.variable_scope('model'):
		model = Model(config, word_emb_mat=wordemb, kb_emb_mat=kbemb)
	config.is_train = False
	config.batch = 200
	with tf.variable_scope('model', reuse=True):
		mtest = Model(config, word_emb_mat=wordemb, kb_emb_mat=kbemb)

	saver = tf.train.Saver()
	tfconfig = tf.ConfigProto()
	# tfconfig.gpu_options.allow_growth = True
	sess = tf.Session(config=tfconfig)
	# writer = tf.summary.FileWriter('./graph', sess.graph)
	sess.run(tf.global_variables_initializer())
	num_batch = int(math.ceil(train_dset.datasize / model.batch))
	for ei in range(model.epoch_num):
		train_dset.current_index = 0
		loss_iter = 0.0
		for bi in tqdm(range(num_batch)):
			mini_batch = train_dset.get_mini_batch(model.batch)
			if mini_batch == None:
				break
			triples, questions, qlen, subnames = mini_batch
			feed_dict = {}
			feed_dict[model.triple] = triples
			feed_dict[model.question] = questions
			feed_dict[model.qlen] = qlen
			feed_dict[model.keep_prob] = 1.0
			loss, train_op, out_idx = sess.run(model.out, feed_dict=feed_dict)
			# writer.add_graph(sess.graph)
			loss_iter += loss
		loss_iter /= num_batch
		logging.info('iter %d, train loss: %f' % (ei, loss_iter))
		# model.valid_model(sess, valid_dset, ei, saver)
	mtest.decode_test_with_full_questions(sess, test_dset, ei, wordlist, kblist, saver, dir='./output_newdata')

Exemple #24

0

Afficher le fichier

Fichier : dcr_tester.py Projet : ankitshah009/Object_detection

def split_batch_by_box_num(batches, box_batch_size):
    batchIdxs, batch_datas = batches
    newdata = []
    num_gpu = len(
        batch_datas
    )  # each is a Dataset instance, d.data['img'] is a one item list

    num_boxes = [
        batch_datas[i].data['gt'][0]['boxes'].shape[0] for i in xrange(num_gpu)
    ]
    max_num_box = max(num_boxes)
    min_num_box = min(num_boxes)

    split_into_num_batch = int(math.ceil(max_num_box / float(box_batch_size)))

    # the indexes for each inner batch
    # the batch with not enough will fill with 0, the first box
    each_batch_selected_indexes = [
        grouper(range(num_boxes[i]), box_batch_size, fillvalue=0)
        for i in xrange(num_gpu)
    ]

    # still need to handle some batch has not enough batch
    t2 = []
    for b in each_batch_selected_indexes:
        if len(b) < split_into_num_batch:
            need = split_into_num_batch - len(b)
            b = b + [[0 for _ in xrange(box_batch_size)] for _ in xrange(need)]
        t2.append(b)

    for i in xrange(split_into_num_batch):
        this_datas = []
        for j in xrange(num_gpu):
            selected = each_batch_selected_indexes[j][i]
            temp = {
                "imgs": [batch_datas[j].data['imgs'][0]],
                "imgdata": [batch_datas[j].data['imgdata'][0]],
                "resized_image": [batch_datas[j].data['resized_image'][0]],
                'gt': [{
                    "boxes":
                    batch_datas[j].data['gt'][0]['boxes'][selected, :],
                    #"labels": batch_datas[j].data['gt'][0]['labels'][selected],
                }],
            }
            this_datas.append(temp)
        newdata.append(
            (batchIdxs, [Dataset(this_data) for this_data in this_datas]))
    return newdata

Exemple #25

0

Afficher le fichier

Fichier : productionize.py Projet : codekansas/language_conv_visualizations

def productionize(save_loc: str, model_save_loc: str) -> None:
    if not os.path.exists(model_save_loc):
        raise RuntimeError('No such trained model exists: "{}". Run the '
                           'training script first!'.format(model_save_loc))

    if not os.path.exists(save_loc):
        os.makedirs(save_loc)

    dataset = Dataset()

    model = ks.models.load_model(model_save_loc)

    print('Converting model to Tensorflow-JS format')
    save_model_tfjs(model, save_loc)

    print('Saving accessory JSON files')
    save_accessory_json(dataset, save_loc)

Exemple #26

0

Afficher le fichier

Fichier : test2.py Projet : MartingaleField/MartingaleField.github.io

def fit_encoder(X, params):
    varying = bool(np.isnan(np.sum(X)))

    train = torch.from_numpy(X)
    if params['cuda']:
        train = train.cuda(params['gpu'])

    train_torch_dataset = Dataset(X)
    train_generator = torch.utils.data.DataLoader(
        train_torch_dataset, batch_size=params['batch_size'], shuffle=True)
    encoder = causal_cnn.CausalCNNEncoder(params['in_channels'],
                                          params['channels'], params['depth'],
                                          params['reduced_size'],
                                          params['out_channels'],
                                          params['kernel_size'])
    if params['cuda']:
        encoder.cuda(params['gpu'])
    encoder.double()
    optimizer = torch.optim.Adam(encoder.parameters(), lr=params['lr'])

    # configure("BasisAnalysis/run-epoch20", flush_secs=2)
    # wandb.init(project="BasisAnalysis")
    losses = {'training': []}
    for i in range(params['epochs']):
        for batch in train_generator:
            if params['cuda']:
                batch = batch.cuda(params['gpu'])
            optimizer.zero_grad()
            if not varying:
                _loss = triplet_loss.TripletLoss(params['compared_length'],
                                                 params['nb_random_samples'],
                                                 params['negative_penalty'])
            else:
                _loss = triplet_loss.TripletLossVaryingLength(
                    params['compared_length'], params['nb_random_samples'],
                    params['negative_penalty'])
            loss = _loss(batch, encoder, train)
            loss.backward()
            optimizer.step()
            print('[LOSS] Epoch {} : {}'.format(i + 1, loss))
        # log_value('loss', loss, i)
        losses['training'].append(loss)

        # print(torch.cuda.get_device_name(0))
        # wandb.log(losses)
    return encoder, losses

Exemple #27

0

Afficher le fichier

    def test(self, test_file):
        if not self.test_data:
            dataset = Dataset(test_file,
                              featuresCols=range(84),
                              targetCol=[84])
            self.test_data = th.utils.data.DataLoader(dataset, batch_size=20)

        total_loss = 0

        for data, target in self.test_data:
            data = data.float()
            pred = self.model(data)

            target = target.float().reshape(-1, 1).to(self.model.device)
            loss = self.loss_func(pred, target)

            total_loss += loss.item()

        return total_loss / len(dataset)

Exemple #28

0

Afficher le fichier

    def test(self, test_file):
        if not self.test_data:
            dataset = Dataset(test_file,
                              featuresCols=range(0, 50),
                              targetCol=[50, 51])
            self.test_data = th.utils.data.DataLoader(dataset, batch_size=20)

        total_loss = 0

        for data, target in self.test_data:
            data = data.float()
            pred = self.model(data)

            target, reward = th.split(target, [1, 1], dim=1)
            target = target.flatten()

            loss = self.loss_func(pred, target)

            total_loss += th.mean(loss * reward).item()

        return total_loss / len(dataset)

Exemple #29

0

Afficher le fichier

Fichier : representation_train.py Projet : eudtr/EUDTR

    def encode(self, X, batch_size=50):
        """
        Calculate the representation of the time series

        Args:
            X: Time series dataset
        """
        test = Dataset(X)
        test_generator = torch.utils.data.DataLoader(test, batch_size=batch_size)
        features = numpy.zeros((numpy.shape(X)[0], self.out_channels))
        self.encoder = self.encoder.eval()

        count = 0
        with torch.no_grad():
            for batch in test_generator:
                batch = batch.to(self.device)
                features[
                count * batch_size: (count + 1) * batch_size
                ] = self.encoder(batch).cpu()
                count += 1

        return features

Exemple #30

0

Afficher le fichier

def TrainModel(args, model, num_clips, fourier=False):   
    # Datasets
    trainfunc = Dataset(TRAINING_DATASET,range(1, num_clips))
    trainloader = torch.utils.data.DataLoader(trainfunc, batch_size=args.batch_size, shuffle=True, num_workers=0)
    
    # Optimize and Loss
    optimizer = torch.optim.Adam(model.parameters(), args.lr)
    if not args.variational:
        lossfunc = nn.MSELoss()
    else:
        lossfunc = vae_loss
    model.train()
    
    # Train
    eval_results = []
    for epoch in range(args.num_epochs):
        for i, data in enumerate(trainloader):
            optimizer.zero_grad()
            if not fourier:
                inputs = data.to(args.device)
                outputs = model(inputs)
                loss = lossfunc(outputs, inputs)
                loss.backward()
            else:
                comp_input = torch.stft(data, n_fft=2048, window=torch.hann_window(2048), return_complex=True)
                real, imag = comp_input.real.unsqueeze(0).to(args.device), comp_input.imag.unsqueeze(0).to(args.device)
                realOUT, imagOUT = model(real, imag)
                loss_real = lossfunc(realOUT, real)
                loss_imag = lossfunc(imagOUT, imag)
                loss_real.backward()
                loss_imag.backward()
            optimizer.step()
            
        eval_results.append(TestModel(args, model, TRAINING_DATASET, num_clips, fourier=fourier))
        print(f'[Epoch {epoch}]\tEvaluation Loss: {eval_results[-1]}')
    print('Finished Training')
    
    return eval_results

Exemple #31

0

Afficher le fichier

Fichier : run_visltm.py Projet : Hediby/vanilla_vqa


    q_i2w, q_w2i = load_vocab('datasets/coco/train/questions.vocab')

    print("Load word2Vec")
    embeddings = {}
    for n,l in enumerate(open(embedding_path,encoding='utf-8')):
        l = l.strip().split()
        w = l[0]
        vec = [float(x) for x in l[1:]]
        embeddings[w] = vec

    emb,c = load_emb_matrix(q_i2w, embeddings)
    del embeddings
    train_set = Dataset("/home/hbenyounes/vqa/datasets/coco/train/images.feat",
                        "/home/hbenyounes/vqa/datasets/coco/train/img_ids.txt",
                        "/home/hbenyounes/vqa/datasets/coco/train/questions.idxs",
                        "/home/hbenyounes/vqa/datasets/coco/train/answers.idxs")


    test_set = Dataset("/home/hbenyounes/vqa/datasets/coco/test/images.feat",
                        "/home/hbenyounes/vqa/datasets/coco/test/img_ids.txt",
                        "/home/hbenyounes/vqa/datasets/coco/test/questions.idxs",
                        "/home/hbenyounes/vqa/datasets/coco/test/answers.idxs")

    if not exists(join(root_path, model_name)):
        mkdir(join(root_path, model_name))

    q_i2w, q_w2i = load_vocab('datasets/coco/train/questions.vocab')
    a_i2w, a_w2i = load_vocab('datasets/coco/train/answers.vocab')
    Nq = len(q_i2w)
    Na = len(a_i2w)