Example #1
0
def main():
    log_path = __file__[:-3]
    init_run(log_path, 2021)

    device = torch.device('cuda')
    config = get_gowalla_config(device)
    dataset_config, model_config, trainer_config = config[6]
    dataset_config['path'] = dataset_config['path'][:-4] + '0_dropui'

    writer = SummaryWriter(log_path)
    dataset = get_dataset(dataset_config)
    model = get_model(model_config, dataset)
    trainer = get_trainer(trainer_config, dataset, model)
    trainer.train(verbose=True, writer=writer)
    writer.close()

    dataset_config['path'] = dataset_config['path'][:-7]
    new_dataset = get_dataset(dataset_config)
    model.config['dataset'] = new_dataset
    model.n_users, model.n_items = new_dataset.n_users, new_dataset.n_items
    model.feat_mat, _, _, model.row_sum = model.generate_feat(new_dataset,
                                                              is_updating=True)
    model.update_feat_mat()
    trainer = get_trainer(trainer_config, new_dataset, model)
    trainer.inductive_eval(dataset.n_users, dataset.n_items)
Example #2
0
def train():
    parser = argparse.ArgumentParser(description='Which model to run')
    parser.add_argument('m',
                        default=0,
                        type=int,
                        nargs='?',
                        help='model number')
    args = parser.parse_args()

    print(f'running model {args.m}')
    if args.m == 4:
        model = get_model4(walk_length=20, nr_walks=20, feature_length=12)
    else:
        model = get_model(walk_length=20, nr_walks=20, feature_length=12)

    ds_train = get_dataset(path=savedir,
                           walk_length=20,
                           nr_walks=20,
                           feature_length=12,
                           train=True)
    ds_val = get_dataset(path=savedir,
                         walk_length=20,
                         nr_walks=20,
                         feature_length=12,
                         train=False)

    model.compile(loss=[tf.keras.losses.MeanSquaredError(name='mse')],
                  optimizer='adam',
                  metrics=['accuracy', 'mse'])

    hist = model.fit(x=ds_train, epochs=100, validation_data=ds_val, verbose=2)

    model.save("my_model")
Example #3
0
File: run.py Project: zingp/cbot
def train(config):
    # train_path:train-context.json
    args = config.args
    train_set = get_dataset(config.train_path,
                            config.w2i_vocabs,
                            config,
                            is_train=True)
    dev_set = get_dataset(config.dev_path,
                          config.w2i_vocabs,
                          config,
                          is_train=False)
    # X:img,torch.stack;
    train_batch = get_dataloader(train_set, args.batch_size, is_train=True)
    model = Model(n_emb=args.n_emb,
                  n_hidden=args.n_hidden,
                  vocab_size=args.vocab_size,
                  dropout=args.dropout,
                  d_ff=args.d_ff,
                  n_head=args.n_head,
                  n_block=args.n_block)
    if args.restore != '':
        model_dict = torch.load(args.restore)
        model.load_state_dict(model_dict)
    model.to(device)
    optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                  model.parameters()),
                           lr=args.lr)
    best_score = -1000000

    for i in range(args.epoch):
        model.train()
        report_loss, start_time, n_samples = 0, time.time(), 0
        count, total = 0, len(train_set) // args.batch_size + 1
        for batch in train_batch:
            Y, T = batch
            Y = Y.to(device)
            T = T.to(device)
            optimizer.zero_grad()
            loss = model(Y, T)
            loss.backward()
            optimizer.step()
            report_loss += loss.item()
            #break
            n_samples += len(Y.data)
            count += 1
            if count % args.report == 0 or count == total:
                print('%d/%d, epoch: %d, report_loss: %.3f, time: %.2f' %
                      (count, total, i + 1, report_loss / n_samples,
                       time.time() - start_time))
                score = eval(model, dev_set, args.batch_size)
                model.train()
                if score > best_score:
                    best_score = score
                    save_model(os.path.join(args.dir, 'best_checkpoint.pt'),
                               model)
                else:
                    save_model(os.path.join(args.dir, 'checkpoint.pt'), model)
                report_loss, start_time, n_samples = 0, time.time(), 0

    return model
Example #4
0
def main():
    log_path = __file__[:-3]
    init_run(log_path, 2021)

    device = torch.device('cuda')
    config = get_gowalla_config(device)
    dataset_config, model_config, trainer_config = config[3]
    dataset_config['path'] = dataset_config['path'][:-4] + '0_dropit'

    dataset = get_dataset(dataset_config)
    model = get_model(model_config, dataset)

    dataset_config['path'] = dataset_config['path'][:-7]
    new_dataset = get_dataset(dataset_config)
    model.config['dataset'] = new_dataset
    trainer = get_trainer(trainer_config, new_dataset, model)
    results, _ = trainer.eval('test')
    print('Previous interactions test result. {:s}'.format(results))

    data_mat = sp.coo_matrix((np.ones((len(
        new_dataset.train_array), )), np.array(new_dataset.train_array).T),
                             shape=(new_dataset.n_users, new_dataset.n_items),
                             dtype=np.float32).tocsr()
    model.data_mat = data_mat
    results, _ = trainer.eval('test')
    print('Updated interactions test result. {:s}'.format(results))
Example #5
0
def get_data(dataset_index,
             t0=-150, t1=100, debug=False,\
             Nsmooth=2,
             smoothing=None):

    # loading data
    print(get_dataset()[dataset_index])
    delay = 0  #get_dataset()[dataset_index]['delay']
    f = loadmat(get_dataset()[dataset_index]['filename'])
    data = 1e3 * f['matNL'][0]['stim1'][0]
    data[np.isnan(data)] = 0  # blanking infinite data
    time = f['matNL'][0]['time'][0].flatten()
    space = f['matNL'][0]['space'][0].flatten()
    if smoothing is None:
        smoothing = np.ones((Nsmooth, Nsmooth)) / Nsmooth**2
    smooth_data = convolve2d(data, smoothing, mode='same')
    # smooth_data = data  # REMOVE DATA SMOOTHING
    # apply time conditions
    cond = (time > t0 - delay) & (time < t1 - delay)
    new_time, new_data = np.array(time[cond]), np.array(smooth_data[:, cond])
    # get onset time
    tmax = get_time_max(new_time, new_data, debug=debug)
    x_center = get_stim_center(new_time,
                               space,
                               new_data,
                               debug=debug,
                               tmax=tmax)
    return new_time - tmax, space - x_center, new_data
Example #6
0
def main():
    log_path = __file__[:-3]
    init_run(log_path, 2021)

    device = torch.device('cuda')
    config = get_gowalla_config(device)
    dataset_config, model_config, trainer_config = config[5]
    dataset_config['path'] = dataset_config['path'][:-4] + '0_dropit'

    writer = SummaryWriter(log_path)
    dataset = get_dataset(dataset_config)
    model = get_model(model_config, dataset)
    trainer = get_trainer(trainer_config, dataset, model)
    trainer.train(verbose=True, writer=writer)
    writer.close()

    dataset_config['path'] = dataset_config['path'][:-7]
    new_dataset = get_dataset(dataset_config)
    model.config['dataset'] = new_dataset
    trainer = get_trainer(trainer_config, new_dataset, model)
    results, _ = trainer.eval('test')
    print('Previous interactions test result. {:s}'.format(results))

    model.normalized_data_mat = model.get_data_mat(new_dataset)
    results, _ = trainer.eval('test')
    print('Updated interactions test result. {:s}'.format(results))
Example #7
0
def get_data_loaders_new(args, tokenizer):
    train_data = get_dataset(tokenizer,
                             args.train_path,
                             args.fea_path,
                             n_history=args.max_history)
    valid_data = get_dataset(tokenizer,
                             args.valid_path,
                             args.fea_path,
                             n_history=args.max_history)
    train_dataset = AVSDDataSet(train_data[0],
                                tokenizer, (train_data[1], valid_data[1]),
                                drop_rate=0,
                                train=True)
    valid_dataset = AVSDDataSet(valid_data[0],
                                tokenizer, (valid_data[1], train_data[1]),
                                drop_rate=0,
                                train=False)
    train_loader = DataLoader(train_dataset,
                              shuffle=(not args.distributed),
                              batch_size=args.train_batch_size,
                              num_workers=4,
                              collate_fn=lambda x: collate_fn(
                                  x, tokenizer.pad_token_id, features=True))
    valid_loader = DataLoader(valid_dataset,
                              shuffle=False,
                              batch_size=args.valid_batch_size,
                              num_workers=4,
                              collate_fn=lambda x: collate_fn(
                                  x, tokenizer.pad_token_id, features=True))
    return train_loader, valid_loader
Example #8
0
def test_ClfDset():
    config = read_yaml("./tests/config/arcface.yaml")
    dset_config = read_yaml("./tests/config/mnist.yaml")

    dataset = get_dataset(config, dset_config, mode="train")
    dataset = get_dataset(config, dset_config, mode="valid")
    dataset = get_dataset(config, dset_config, mode="valid")
Example #9
0
def test_MnistDset():
    config = read_yaml("./config/clf.yaml")
    dset_config = read_yaml("./config/mnist.yaml")

    dataset = get_dataset(config, dset_config, mode="train")
    dataset = get_dataset(config, dset_config, mode="valid")
    dataset = get_dataset(config, dset_config, mode="valid")
Example #10
0
def main(data_path, model_path, idtable_path, step, split):

    if split == 'Valid':
        dataset = get_dataset(data_path,
                              "valid_data.csv",
                              vcc18=True,
                              valid=True,
                              idtable=idtable_path)

    elif split == 'Test':
        dataset = get_dataset(data_path,
                              "testing_data.csv",
                              vcc18=True,
                              valid=True,
                              idtable=idtable_path)

    dataloader = get_dataloader(dataset,
                                batch_size=20,
                                num_workers=1,
                                shuffle=False)

    model = MBNet(num_judges=5000).to(device)
    model.load_state_dict(torch.load(model_path))

    lamb = 4
    valid(model, dataloader, step, split, lamb)
Example #11
0
def main():
    log_path = __file__[:-3]
    init_run(log_path, 2021)

    device = torch.device('cuda')
    config = get_gowalla_config(device)
    dataset_config, model_config, trainer_config = config[3]
    dataset_config['path'] = dataset_config['path'][:-4] + '0_dropui'

    dataset = get_dataset(dataset_config)
    model = get_model(model_config, dataset)

    dataset_config['path'] = dataset_config['path'][:-7]
    new_dataset = get_dataset(dataset_config)
    model.config['dataset'] = new_dataset
    model.n_users, model.n_items = new_dataset.n_users, new_dataset.n_items
    data_mat = sp.coo_matrix((np.ones((len(
        new_dataset.train_array), )), np.array(new_dataset.train_array).T),
                             shape=(new_dataset.n_users, new_dataset.n_items),
                             dtype=np.float32).tocsr()
    model.data_mat = data_mat
    sim_mat = model.sim_mat.tocoo()
    sim_mat = sp.coo_matrix((sim_mat.data, (sim_mat.row, sim_mat.col)),
                            shape=(new_dataset.n_items, new_dataset.n_items))
    model.sim_mat = sim_mat.tocsr()
    trainer = get_trainer(trainer_config, new_dataset, model)
    trainer.inductive_eval(dataset.n_users, dataset.n_items)
Example #12
0
    def __init__(self, config):
        super(Trainer, self).__init__()
        self.use_cuda = torch.cuda.is_available()
        self.device = 'cuda' if self.use_cuda else 'cpu'
        # self.device ='cuda:1'

        # model
        self.modef = config['model']
        self.model = get_model(config)
        self.input_dims = config['input_dims']
        self.z_dims = config['z_dims']
        self.prior = distributions.MultivariateNormal(torch.zeros(self.z_dims),
                                                      torch.eye(self.z_dims))

        # train
        self.max_iter = config['max_iter']
        self.global_iter = 1
        self.mseWeight = config['mse_weight']
        self.lr = config['lr']
        self.beta1 = config['beta1']
        self.beta2 = config['beta2']
        self.optim = optim.Adam(self.model.parameters(),
                                lr=self.lr,
                                betas=(self.beta1, self.beta2))
        self.implicit = 'implicit' in config and config['implicit']
        if self.implicit:
            self.train_inst = self.implicit_inst

        # saving
        self.ckpt_dir = config['ckpt_dir']
        os.makedirs(self.ckpt_dir, exist_ok=True)
        self.ckpt_name = config['ckpt_name']
        self.save_output = config['save_output']
        self.output_dir = config['output_dir']
        os.makedirs(self.output_dir, exist_ok=True)
        # saving
        if config['cont'] and self.ckpt_name is not None:
            self.load_checkpoint(self.ckpt_name)

        self.meta = defaultdict(list)

        self.gather_step = config['gather_step']
        self.display_step = config['display_step']
        self.save_step = config['save_step']

        # data
        self.dset_dir = config['dset_dir']
        self.dataset = config['dataset']
        self.data_type = config['data_type']
        if self.data_type == 'linear':
            self.draw_reconstruction = self.linear_reconstruction
            self.draw_generated = self.linear_generated
            self.visualize_traverse = self.linear_traverse
            self.traversal = self.linear_traversal
        self.batch_size = config['batch_size']
        self.img_size = 32 if 'image_size' not in config else config[
            'image_size']
        self.data_loader = get_dataset(config)
        self.val_loader = get_dataset(config, train=False)
Example #13
0
def get_dataset(params):
    parameter = params.get('parameter', None)
    if isinstance(parameter, list):
        return dataset.get_dataset(*parameter)
    elif isinstance(parameter, dict):
        return dataset.get_dataset(**parameter)
    elif parameter is not None:
        return dataset.get_dataset(parameter)
    return dataset.get_dataset()
Example #14
0
def test_build_triplet():
    config = read_yaml("./tests/config/triplet.yaml")
    dset_config = read_yaml("./tests/config/mnist.yaml")

    dset = get_dataset(config, dset_config, mode="train")
    valid_dset = get_dataset(config, dset_config, mode="valid")
    model = get_model(config, dset_config)

    trainer = get_trainer(config, dset_config)
Example #15
0
def test_build_arcface():
    config = read_yaml("./tests/config/arcface.yaml")
    dset_config = read_yaml("./tests/config/mnist.yaml")

    dset = get_dataset(config, dset_config, mode="train")
    valid_dset = get_dataset(config, dset_config, mode="valid")
    model = get_model(config, dset_config)

    trainer = get_trainer(config, dset_config)
    trainer.train(dataset=dset, valid_dataset=valid_dset, model=model)
Example #16
0
    def _make_input(self):
        train_dataset = get_dataset(self.vocab.word2idx,
                                    self.train_tfrecord_file,
                                    self.train_size,
                                    repeat_num=self.num_epochs,
                                    shuffle_bufer=1000,
                                    prefetch=1000)
        valid_dataset = get_dataset(self.vocab.word2idx,
                                    self.valid_tfrecord_file,
                                    self.valid_size,
                                    repeat_num=-1,
                                    shuffle_bufer=1000)
        test_dataset = get_dataset(self.vocab.word2idx,
                                   self.test_tfrecord_file,
                                   self.test_size,
                                   repeat_num=1,
                                   shuffle_bufer=1000)

        self.train_iterator = train_dataset.make_initializable_iterator()
        self.valid_iterator = valid_dataset.make_initializable_iterator()
        self.test_iterator = test_dataset.make_initializable_iterator()

        data_iter = tf.data.Iterator.from_string_handle(
            self.data_handle, train_dataset.output_types,
            train_dataset.output_shapes)
        batch_data = data_iter.get_next()
        self.passage = tf.cast(batch_data["passage"], tf.int32, name="passage")
        self.query = tf.cast(batch_data["query"], tf.int32, name="query")
        self.answer = tf.cast(batch_data['answer'], tf.int32, name="query")
        self.passage_len = tf.cast(batch_data["passage_len"],
                                   tf.int32,
                                   name="passage_len")
        self.query_len = tf.cast(batch_data["query_len"],
                                 tf.int32,
                                 name="query_len")
        self.answer_len = tf.cast(batch_data["answer_len"],
                                  tf.int32,
                                  name="answer_len")
        self.query_id = tf.cast(batch_data["query_id"],
                                tf.int32,
                                name="query_id")

        # make labels and predict
        self.alter0 = tf.cast(batch_data["alter0"], tf.int32, name="alter0")
        self.alter1 = tf.cast(batch_data["alter1"], tf.int32, name="alter1")
        self.alter2 = tf.cast(batch_data["alter2"], tf.int32, name="alter2")
        self.alter0_len = tf.cast(batch_data["alter0_len"],
                                  tf.int32,
                                  name="alter0_len")
        self.alter1_len = tf.cast(batch_data["alter1_len"],
                                  tf.int32,
                                  name="alter1_len")
        self.alter2_len = tf.cast(batch_data["alter2_len"],
                                  tf.int32,
                                  name="alter2_len")
Example #17
0
    def run_trial(self, trial, data_dir, num_val_batches, objective, *args, **kwargs):
        hp = trial.hyperparameters
        model = self.hypermodel.build(trial.hyperparameters)

        num_epochs = kwargs.get('num_epochs')

        batch_size = model.batch_size
        seq_len = model.seq_len
        overlap = model.big_frame_size
        q_type = 'mu-law'
        q_levels = 256

        (train_split, val_split) = get_dataset_filenames_split(
            data_dir,
            num_val_batches * model.batch_size
        )

        # Train, Val and Test Datasets
        train_dataset = get_dataset(train_split, num_epochs, batch_size, seq_len, overlap,
                                    drop_remainder=True, q_type=q_type, q_levels=q_levels)
        val_dataset = get_dataset(val_split, 1, batch_size, seq_len, overlap, shuffle=False,
                                  drop_remainder=True, q_type=q_type, q_levels=q_levels)

        # Get subseqs per batch...
        samples0, _ = librosa.load(train_split[0], sr=None, mono=True)
        steps_per_batch = int(np.floor(len(samples0) / float(seq_len)))

        # Get subseqs per epoch...
        steps_per_epoch = len(train_split) // batch_size * steps_per_batch

        # Train...
        history = model.fit(
            train_dataset,
            epochs=num_epochs,
            steps_per_epoch=steps_per_epoch,
            shuffle=False,
            validation_data=val_dataset 
        )

        # See https://github.com/keras-team/keras-tuner/blob/master/kerastuner/engine/multi_execution_tuner.py#L95
        metrics = collections.defaultdict()
        for metric, epoch_values in history.history.items():
            if self.oracle.objective.direction == 'min':
                best_value = np.min(epoch_values)
            else:
                best_value = np.max(epoch_values)
            metrics[metric] = best_value

        oracle_metrics_dict = {objective: metrics[objective]}

        # If we completely override run_trial we need to call this at the end.
        # See https://keras-team.github.io/keras-tuner/documentation/tuners/#run_trial-method_1 
        self.oracle.update_trial(trial.trial_id, oracle_metrics_dict)
        self.save_model(trial.trial_id, model)
Example #18
0
def plot_response(args):

    fig, ax = plt.subplots(1, figsize=(4.7, 3))
    fig.suptitle(get_dataset()[args.data_index]['filename'])
    plt.subplots_adjust(bottom=.23, top=.9, right=.84, left=.25)

    print(get_dataset()[args.data_index])
    f = loadmat(get_dataset()[args.data_index]['filename'])
    data = 1e3 * f['matNL'][0]['stim1'][0]
    time = f['matNL'][0]['time'][0].flatten() + args.tshift
    print(time[-1] - time[0])
    space = f['matNL'][0]['space'][0].flatten()
    if args.Nsmooth > 0:
        smoothing = np.ones((args.Nsmooth, args.Nsmooth)) / args.Nsmooth**2
        smooth_data = convolve2d(data, smoothing, mode='same')
    else:
        smooth_data = data

    cond = (time > args.t0) & (time < args.t1)
    c = ax.contourf(time[cond], space, smooth_data[:,cond],\
             np.linspace(smooth_data.min(), smooth_data.max(), args.Nlevels), cmap=cm.viridis)
    plt.colorbar(c, label='VSD signal ($\perthousand$)', ticks=args.vsd_ticks)

    x1, x2 = ax.get_xlim()
    ax.plot([x1, x1], [0, 2], '-', color='gray', lw=4)
    ax.annotate('2mm', (x1, 2), rotation=90, fontsize=14)
    y1, y2 = ax.get_ylim()
    ax.plot([x1, x1 + 50], [y1, y1], '-', color='gray', lw=4)
    ax.annotate('50ms', (x1 + 20, y1 + .5), fontsize=14)

    if args.with_onset_propag:
        tt, xx = find_latencies_over_space_simple(time, space,
                                         smooth_data[:,cond],
                                         signal_criteria=args.signal_criteria,\
                                         amp_criteria=args.amp_criteria)
        plt.plot(tt + args.tshift, xx, 'o', lw=0, ms=1, color='k')

        # for intervals in [[0,2.3], [2.5,5.7], [5.9,8.5]]:
        #     cond = (xx>intervals[0]) & (xx<intervals[1]) & (tt<20)
        #     pol = np.polyfit(xx[cond], tt[cond]+100, 1)
        #     xxx = np.linspace(xx[cond][0], xx[cond][-1])
        #     plt.plot(np.polyval(pol, xxx), xxx, 'w--', lw=2)

    # set_plot(ax, ['bottom'], yticks=[], xlabel='time (ms)')
    set_plot(ax, xlabel='time (ms)', ylabel='space (mm)')
    if args.SAVE:
        fig.savefig('/Users/yzerlaut/Desktop/temp.svg')
    else:
        show()
Example #19
0
def pp_trans_d_for_model(name):
    dataset = ds.get_dataset(ds.TO_TRANS_D.get('type'), name)
    size = dataset.datasize()

    # 创建数据索引
    index_list = [i for i in range(size)]
    # 数据索引打乱
    random.shuffle(index_list)

    train_size = int(size * 0.6)
    valid_size = int(size * 0.2)
    test_size = int(size * 0.2)

    X = dataset.X.as_matrix()
    Y = dataset.Y.as_matrix()

    # 剔除日期字段
    X = X[:, 1:]
    # 改变数据的dtype
    X = X.astype('float')

    # 将Y 转换成int类别
    # 由于tensorflow 接受的类别标签必须是 大于0的数 所以对Y值转成int之后再 +10
    # Y = np.asarray(list(map(lambda x: int(x) + 10, Y)))
    Y = np.asarray(list(map(float, Y)))

    Y = uniform_distribution(Y, 21, 'n')

    print("Y shape is %s" % str(Y.shape))

    # 生成训练集
    train_X = X[index_list[:train_size]]
    train_Y = Y[index_list[:train_size]]

    # 生成验证集
    valid_X = X[index_list[train_size:train_size + valid_size]]
    valid_Y = Y[index_list[train_size:train_size + valid_size]]

    # 生成测试集
    test_X = X[index_list[train_size + valid_size:]]
    test_Y = Y[index_list[train_size + valid_size:]]

    print("orig X shape %s" % (str(X.shape)))
    print("orig Y shape %s" % (str(Y.shape)))

    print("train X shape %s" % (str(train_X.shape)))
    print("train Y shape %s" % (str(train_Y.shape)))
    print("valid X shape %s" % (str(valid_X.shape)))
    print("valid Y shape %s" % (str(valid_Y.shape)))
    print("test X shape %s" % (str(test_X.shape)))
    print("test Y shape %s" % (str(test_Y.shape)))

    return {
        "train_X": train_X,
        "train_Y": train_Y,
        "valid_X": valid_X,
        "valid_Y": valid_Y,
        "test_X": test_X,
        "test_Y": test_Y
    }
Example #20
0
def predict_svd_90():
    """
        Description :
        Retains 90% of the energy in terms of singular values of the 'sigma' matrix obtained after
        SVD decomposition and performs reconstruction of the original matrix using these singular
        values.

        Parameter(s):

        Return:
        pred : A list of lists with each list of the form : [userid, prediction,itemid]
        y : The actual ratings given by the users in the test set
    """
    ratings = get_dataset()
    U,S,Vt = svd(ratings,1)
    print(U.shape,S.shape,Vt.shape)
    total = 0
    for i in range(S.shape[0]):
        total += S[i,i]*S[i,i]
    so_far = 0
    ind = 0
    for i in range(S.shape[0]):
        so_far += S[i,i]*S[i,i]
        if so_far/total > 0.9:
            ind = i
            break

    U = U[:,:(ind+1)]
    Vt = Vt[:(ind+1)]
    S = S[:(ind+1)]
    S = S[:,:(ind+1)]
    print(U.shape,S.shape,Vt.shape)
    pred,y = predict_svd(U,S,Vt)
    return pred,y
Example #21
0
def train_validate(model, hp, args):
    pm = get_model_pretrained()
    for p in pm.parameters():
        p.requires_grad = False
    ptp = layer_vectors(pm, args.device, True)
    del pm

    trainset, validset, validset_subjects, class_weights = get_dataset(
        args.dataroot)
    class_weights = class_weights.to(args.device)

    train_loader = DataLoader(trainset,
                              batch_size=args.batch_size,
                              num_workers=6,
                              shuffle=True,
                              drop_last=True)
    valid_loader = DataLoader(validset,
                              batch_size=args.batch_size,
                              num_workers=6,
                              shuffle=False)

    lmbdas = sorted([
        10**hp[key].value for key in
        ['lmbda0', 'lmbda1', 'lmbda2', 'lmbda3', 'lmbda4', 'lmbda5']
    ],
                    reverse=True)

    opt = torch.optim.Adam([
        {
            'params': model.get_params('layer0'),
            'lr': lmbdas[0]
        },
        {
            'params': model.get_params('layer1'),
            'lr': lmbdas[1]
        },
        {
            'params': model.get_params('layer2'),
            'lr': lmbdas[2]
        },
        {
            'params': model.get_params('layer3'),
            'lr': lmbdas[3]
        },
        {
            'params': model.get_params('layer4'),
            'lr': lmbdas[4]
        },
        {
            'params': model.get_params('cls'),
            'lr': lmbdas[5]
        },
    ])

    train(model, opt, args.steps, train_loader, class_weights, ptp, lmbdas,
          args.device)
    valid_loss, cm, auc, prec, rec, f1 = evaluate(model, valid_loader,
                                                  class_weights, args.device)

    return f1
Example #22
0
    def train(self, stop_width, save_folder, tf_folder, start_width,
              num_samples):

        print('Number of devices: {}'.format(
            self.strategy.num_replicas_in_sync),
              flush=True)

        start_res = math.log(start_width, 2)
        stop_res = math.log(stop_width, 2)  # check if multiple of 2

        resolutions = [2**x for x in np.arange(2, stop_res + 1)]

        for i, resolution in enumerate(resolutions):
            print('Processing step {}: resolution {} with max resolution {}'.
                  format(i, resolution, resolutions[-1]),
                  flush=True)

            self.add_resolution()

            batch_size = self.get_batchsize()
            global_batch_size = batch_size * self.strategy.num_replicas_in_sync
            epochs = self.get_epochs()

            batched_dataset = self.generator.generate_latents(
                num_samples=num_samples)
            batched_dist_dataset = self.strategy.experimental_distribute_dataset(
                dataset.get_dataset(batched_dataset, global_batch_size))

            print('**** Batch size : {}   | **** Epochs : {}'.format(
                batch_size, epochs))

            if self.current_resolution >= start_res and self.current_resolution > 2:
                self.train_resolution(batched_dist_dataset, global_batch_size,
                                      epochs, save_folder, num_samples)
Example #23
0
def get_beer_dataset(data_dir, max_seq_length, word_threshold, balance=False):
    """
    Return tf datasets (train and dev) and language index
    for the beer dataset.
    Assume train.tsv and dev.tsv are in the dir.
    """
    processor = BeerProcessor()
    train_examples = processor.get_train_examples(data_dir)
    dev_examples = processor.get_dev_examples(data_dir)
    print("Dataset: Beer Review")
    print("Training samples %d, Validation sampels %d" %
          (len(train_examples), len(dev_examples)))

    # check the label balance
    train_labels = np.array([0., 0.])
    for train_example in train_examples:
        train_labels += train_example["label"]
    print("Training data: %d positive examples, %d negative examples." %
          (train_labels[1], train_labels[0]))

    dev_labels = np.array([0., 0.])
    for dev_example in dev_examples:
        dev_labels += dev_example["label"]
    print("Dev data: %d positive examples, %d negative examples." %
          (dev_labels[1], dev_labels[0]))

    if balance == True:

        random.seed(12252018)

        print("Make the Training dataset class balanced.")
        # make the beer dataset to be a balanced dataset
        min_examples = int(min(train_labels[0], train_labels[1]))
        pos_examples = []
        neg_examples = []

        for train_example in train_examples:
            if train_example["label"][0] == 1:
                neg_examples.append(train_example)
            else:
                pos_examples.append(train_example)

        assert (len(neg_examples) == train_labels[0])
        assert (len(pos_examples) == train_labels[1])

        if train_labels[0] >= train_labels[1]:
            # more negative examples
            neg_examples = random.sample(neg_examples, min_examples)
        else:
            # more positive examples
            pos_examples = random.sample(pos_examples, min_examples)

        assert (len(pos_examples) == len(neg_examples))
        train_examples = pos_examples + neg_examples
        print(
            "After balance training data: %d positive examples, %d negative examples."
            % (len(pos_examples), len(neg_examples)))

    return get_dataset(train_examples, dev_examples, max_seq_length,
                       word_threshold)
Example #24
0
def main(args):
    alphabet = alphabet_factory()
    device = torch.device('cpu')
    checkpoint = torch.load('model_best.pth', map_location=device)
    in_features = args.n_mfcc * (2 * args.n_context + 1)
    model = build_deepspeech(in_features=in_features,
                             num_classes=len(alphabet))
    model.load_state_dict(checkpoint['state_dict'])
    print_size_of_model(model)
    decoder = GreedyDecoder()
    if args.quantize:
        model = torch.quantization.quantize_dynamic(model, {nn.RNN, nn.Linear},
                                                    dtype=torch.qint8)
        logging.info('quantized model')
        print_size_of_model(model)

    transform = prepare_transformations(args)
    dataset = ProcessedDataset(get_dataset(args.datadir, "dev-clean"),
                               transform, alphabet)
    collate_fn = collate_factory(model_length_function)
    criterion = nn.CTCLoss(blank=alphabet.mapping[alphabet.char_blank])
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=0,
                                             collate_fn=collate_fn,
                                             drop_last=False)
    test_loop_fn(dataloader, model, criterion, device, 1, decoder, alphabet)
Example #25
0
def test():
    sess = tf.InteractiveSession()

    # get test data
    _, _, ds_test = dataset.get_dataset()
    ds_test_iterator = ds_test.make_initializable_iterator()
    next_test_images, next_test_labels = ds_test_iterator.get_next()
    ds_test_iterator.initializer.run()

    # restore frozen graph
    gd = tf.GraphDef.FromString(open(FLAGS.frozen_pb, 'rb').read())
    images, logits = tf.import_graph_def(
        gd, return_elements=['images:0', FLAGS.output_node + ':0'])
    labels = tf.placeholder(tf.float32, [BATCH_SIZE, NUM_CLASSES],
                            name='labels')

    correct_pred = tf.equal(labels, tf.round(tf.sigmoid(logits)))
    acc_op = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

    # run test
    total_test_acc = []
    for i in range(0, TEST_SIZE, BATCH_SIZE):
        images_batch, labels_batch = sess.run(
            [next_test_images, next_test_labels])
        test_acc = acc_op.eval(feed_dict={
            images: images_batch,
            labels: labels_batch
        })
        total_test_acc += [test_acc]

    print('total_test_acc', np.mean(total_test_acc))
def main():
    sentence, char_sentence, tags, _, _, test_iter = \
        get_dataset(BASE_PATH, "atis", BATCH_SIZE, is_inference=True)

    tagger = restore_model(
        "models/ner_cnn-bilstm-crf_*", restore=RESTORED_MODEL)

    final_result = ""
    for it in test_iter:
        words = it.sentence[0]
        sent_len = it.sentence[1]
        char_rep = it.char_sentence[0]

        result = torch.tensor(
            tagger.decode(char_rep, words, sent_len.numpy()),
            dtype=torch.int32)

        sentence_list = words.squeeze(0).numpy().tolist()
        tag_result = result.squeeze(-1).numpy().tolist()

        result_format = "{}  {}\n"
        this_result = ""
        for sent, tag in zip(sentence_list, tag_result):
            this_result += result_format.format(sentence.vocab.itos[sent],
                                                tags.vocab.itos[tag])
        this_result += "\n\n"
        final_result += this_result

    with open(BASE_PATH + "res_atis.txt", "w") as text_file:
        text_file.write(final_result)
Example #27
0
def main():
    args = parse_args()
    if not os.path.exists(log_dir):
        os.mkdir(log_dir)
    if not os.path.exists(model_dir):
        os.mkdir(model_dir)
    data = get_dataset(shrink=200)
    if args.model_fn != None:
        model_fn = args.model_fn
        model = torch.load(model_fn)
    else:
        model = None
    if args.mode == 'all':
        model = train(args.epoch, data)
        test(model, data)
    elif args.mode == 'train':
        train(args.epoch, data, model)
    elif args.mode == 'test':
        test(model,
             data,
             ep=0,
             iter=0,
             test_num=10,
             test_size=50,
             f_log=open("log/test.log", "w"))
    else:
        print("Wrong arguments!")
Example #28
0
def full_analysis(args):

    DATA = get_dataset()
    for i in range(len(DATA)):
        print('analyzing cell ', i, ' [...]')
        args.data_index = i
        analyze_scan(args)
Example #29
0
def fitness(lr, l2_reg, dropout):
    set_seed(2021)
    device = torch.device('cuda')
    dataset_config = {
        'name': 'ProcessedDataset',
        'path': 'data/Gowalla/time',
        'device': device
    }
    model_config = {
        'name': 'MultiVAE',
        'layer_sizes': [64, 32],
        'device': device,
        'dropout': dropout
    }
    trainer_config = {
        'name': 'MLTrainer',
        'optimizer': 'Adam',
        'lr': lr,
        'l2_reg': l2_reg,
        'kl_reg': 0.2,
        'device': device,
        'n_epochs': 1000,
        'batch_size': 512,
        'dataloader_num_workers': 6,
        'test_batch_size': 512,
        'topks': [20]
    }
    dataset = get_dataset(dataset_config)
    model = get_model(model_config, dataset)
    trainer = get_trainer(trainer_config, dataset, model)
    return trainer.train(verbose=True)
Example #30
0
def eval(path, sheet, epoch):
    dset = dataset.get_dataset(1, 'MPEG', False)
    cr = CNNCRluma().cuda()
    sr = CNNSRluma().cuda()
    print(torch.load(path)['epoch'])
    #loads net weigths
    cr.load_state_dict(torch.load(path)['cr'])
    sr.load_state_dict(torch.load(path)['sr'])
    cr.eval()
    sr.eval()
    total, rate = 0.0, 0.0
    for iteration, data in enumerate(dset, 1):
        input, name = data[0].cuda(), data[1]
        with torch.no_grad():
            #FORWARD PASS========
            #down-sample input
            ds, _ = cr(input)
            ds = ds.clamp(0,1) #Because of intperolation
            #code down-sampled input
            coded, bpp = encode(ds, 25)
            #up-sampled decoded image
            us = sr(coded).clamp(0,1)
            #====================
            out = transforms.ToPILImage(mode='L')(us[0][0].cpu())
            gt = transforms.ToPILImage(mode='L')(input[0][0].cpu())

            psnr = get_metrics(gt, out, False)[0]
            total += psnr
            rate += bpp
            print('Bpp: {} --- PSNR: {}'.format(bpp, psnr))
            torch.cuda.empty_cache()
    print(rate/len(dset))
    print(total/len(dset))
Example #31
0
File: mvkm.py Project: gaussWu/mvkm
    def _read_config(self):
        with open(self.config_file, 'r') as config:
            for line in config:
                if line.startswith('#'):
                    continue
                elif line.startswith('[PARAS]'):
                    self.alpha, self.gamma, self.max_iters = \
                            line.strip()[7:].strip().split()
                    self.alpha = float(self.alpha)
                    self.gamma = float(self.gamma)
                    self.max_iters = int(self.max_iters)

                elif line.startswith('[DATASET]'):
                    data = line.strip()[9:].strip().split()
                    data_name = data[0]
                    data_views = [int(v) for v in data[1:]]
                    self.X, self.truth, self.n_clusters, self.n_samples = \
                           ds.get_dataset(name=data_name, views=data_views)
                    # get number of views
                    self.n_views = len(self.X)
                    # get dimension of each view
                    self.dims = [x.shape[1] for x in self.X]

                elif line.startswith('[GROUPS]'):
                    tmp_list = line.strip()[8:].split(';')
                    self.group_size = [0] * self.n_views;
                    #print self.n_views
                    for tl in tmp_list:
                        self.groups.append(tuple([int(t) 
                                for t in tl.strip().split()]))
                        lg = len(self.groups[-1])
                        for view in self.groups[-1]:
                            #print view
                            self.group_size[view] = lg 
                    # get number of groups
                    self.n_groups = len(self.groups)

                elif line.startswith('[INTERACTS]'):
                    tmp_list = line.strip()[11:].split(';')
                    for tl in tmp_list:
                        self.interacts.append(tuple([int(t) 
                                for t in tl.strip().split()]));
                    #print self.interacts
                    self._collect_interacts_items()