コード例 #1
0
ファイル: test_sub_dataset.py プロジェクト: AkioKanno/chainer
    def test_get_cross_validation_datasets(self):
        original = [1, 2, 3, 4, 5, 6]
        cv1, cv2, cv3 = datasets.get_cross_validation_datasets(original, 3)

        tr1, te1 = cv1
        self.assertEqual(len(tr1), 4)
        self.assertEqual(tr1[0], 1)
        self.assertEqual(tr1[1], 2)
        self.assertEqual(tr1[2], 3)
        self.assertEqual(tr1[3], 4)
        self.assertEqual(len(te1), 2)
        self.assertEqual(te1[0], 5)
        self.assertEqual(te1[1], 6)

        tr2, te2 = cv2
        self.assertEqual(len(tr2), 4)
        self.assertEqual(tr2[0], 5)
        self.assertEqual(tr2[1], 6)
        self.assertEqual(tr2[2], 1)
        self.assertEqual(tr2[3], 2)
        self.assertEqual(len(te2), 2)
        self.assertEqual(te2[0], 3)
        self.assertEqual(te2[1], 4)

        tr3, te3 = cv3
        self.assertEqual(len(tr3), 4)
        self.assertEqual(tr3[0], 3)
        self.assertEqual(tr3[1], 4)
        self.assertEqual(tr3[2], 5)
        self.assertEqual(tr3[3], 6)
        self.assertEqual(len(te3), 2)
        self.assertEqual(te3[0], 1)
        self.assertEqual(te3[1], 2)
コード例 #2
0
    def test_get_cross_validation_datasets(self):
        original = [1, 2, 3, 4, 5, 6]
        cv1, cv2, cv3 = datasets.get_cross_validation_datasets(original, 3)

        tr1, te1 = cv1
        self.assertEqual(len(tr1), 4)
        self.assertEqual(tr1[0], 1)
        self.assertEqual(tr1[1], 2)
        self.assertEqual(tr1[2], 3)
        self.assertEqual(tr1[3], 4)
        self.assertEqual(len(te1), 2)
        self.assertEqual(te1[0], 5)
        self.assertEqual(te1[1], 6)

        tr2, te2 = cv2
        self.assertEqual(len(tr2), 4)
        self.assertEqual(tr2[0], 5)
        self.assertEqual(tr2[1], 6)
        self.assertEqual(tr2[2], 1)
        self.assertEqual(tr2[3], 2)
        self.assertEqual(len(te2), 2)
        self.assertEqual(te2[0], 3)
        self.assertEqual(te2[1], 4)

        tr3, te3 = cv3
        self.assertEqual(len(tr3), 4)
        self.assertEqual(tr3[0], 3)
        self.assertEqual(tr3[1], 4)
        self.assertEqual(tr3[2], 5)
        self.assertEqual(tr3[3], 6)
        self.assertEqual(len(te3), 2)
        self.assertEqual(te3[0], 1)
        self.assertEqual(te3[1], 2)
コード例 #3
0
ファイル: trainer-CV.py プロジェクト: mrezler/smiles
        F_list.append(mol_to_feature(mol,-1,args.atomsize))
        T_list.append(mol.GetProp('_Name') )
                
    #-------------------------------    
    # Setting Dataset to model
f.write("Reshape the Dataset...\n")
Mf.random_list(F_list)
Mf.random_list(T_list)
data_t = cp.asarray(T_list, dtype=cp.int32).reshape(-1,1)
data_f = cp.asarray(F_list, dtype=cp.float32).reshape(-1,1,args.atomsize,lensize)
f.write('{0}\t{1}\n'.format(data_t.shape, data_f.shape))

f.write('Validate the Dataset...k ={0}\n'.format(args.validation))
dataset = datasets.TupleDataset(data_f, data_t)
if args.validation > 1:
    dataset = datasets.get_cross_validation_datasets(dataset, args.validation)
    #dataset = datasets.get_cross_validation_datasets_random(dataset, args.validation)
    
#-------------------------------
# reset memory
del mol, mols, data_f, data_t, F_list, T_list
gc.collect()
#-------------------------------      
# 5-fold
print('Training...')
f.write('Convolutional neural network is  running...\n')
v = 1
while v <= args.validation:
    print('...{0}'.format(v))
    f.write('Cross-Validation : {0}\n'.format(v))
コード例 #4
0
    min_layer = 3
    min_node = n_in * 1.5
    max_epoch = 10000
    min_epoch = 5000
    #nnode = (min_node+np.array(hp[:,0])*(max_node-min_node)+0.5).astype(np.int32)
    #nlayer = (min_layer+np.array(hp[:,1])*(max_layer-min_layer)+0.5).astype(np.int32)
    #nepoch = (min_epoch+np.array(hp[:,2])*(max_epoch-min_epoch)+0.5).astype(np.int32)
    nnode0 = [172]
    nnode1 = [100]
    nnode2 = [100]
    nlayer = [4]
    nepoch = [2000]
    nreport = 100

    #divide data about train and validation
    data_list = get_cross_validation_datasets(data, nfold)

    r2_train_1 = np.zeros(nexp)
    r2_valid_1 = np.zeros(nexp)
    loss_train = []
    loss_valid = []
    R2_train = []
    R2_valid = []
    it_train_loss = []
    it_valid_loss = []
    it_train_r2 = []
    it_valid_r2 = []

    r2_train_2 = np.zeros(nexp)
    r2_valid_2 = np.zeros(nexp)
コード例 #5
0
ファイル: train.py プロジェクト: kwignb/ConvLSTM
def train():

    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', '-g', type=int, default=-1)
    parser.add_argument('--model', '-m', type=str, default=None)
    parser.add_argument('--opt', type=str, default=None)
    parser.add_argument('--validation', '-v', type=int, default=5)
    parser.add_argument('--epoch', '-e', type=int, default=20)
    parser.add_argument('--lr', '-l', type=float, default=0.001)
    parser.add_argument('--inf', type=int, default=3)
    parser.add_argument('--outf', type=int, default=3)
    parser.add_argument('--batch', '-b', type=int, default=1)
    args = parser.parse_args()

    train = dataset.UCSDped1Dataset(0, 200, args.inf, args.outf, "./ucsd_ped1_train.npy")

    # cross validation
    dataset_ = datasets.get_cross_validation_datasets(train, args.validation, order=None)
    
    v = 1
    while v <= args.validation:

        model = convlstm.Model(n_input=2, size=[128,64,64])

        if args.model != None:
            print( "loading model from " + args.model )
            serializers.load_npz(args.model, model)

        if args.gpu >= 0:
            cuda.get_device_from_id(0).use()
            model.to_gpu()

        optimizer = optimizers.RMSprop(lr=args.lr)
        optimizer.setup(model)
        
        if args.opt != None:
            print( "loading opt from " + args.opt )
            serializers.load_npz(args.opt, opt)

        train_iter = chainer.iterators.SerialIterator(dataset_[v-1][0], batch_size=args.batch, shuffle=False)
        test_iter = chainer.iterators.SerialIterator(dataset_[v-1][1], batch_size=args.batch, repeat=False, shuffle=False)
        
        updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
        trainer = training.Trainer(updater, (args.epoch, 'epoch'), out='results')
        
        trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu))
          
        trainer.extend(extensions.LogReport(trigger=(1, 'epoch'), log_name='log_'+str(v)+'_epoch'))
        trainer.extend(extensions.LogReport(trigger=(10, 'iteration')))

        trainer.extend(extensions.PrintReport(['epoch', 'main/loss', 'validation/main/loss', 'elapsed_time']))
        trainer.extend(extensions.PlotReport(['main/loss', 'validation/main/loss'],
                       x_key='epoch', file_name='loss_'+str(v)+'_epoch.png'))
        trainer.extend(extensions.ProgressBar(update_interval=1))
        
        trainer.run()
        
        modelname = "./results/model" + str(v)
        print( "saving model to " + modelname )
        serializers.save_npz(modelname, model)

        optname = "./results/opt" + str(v)
        print( "saving opt to " + optname )
        serializers.save_npz(optname, optimizer)

        v = v + 1
コード例 #6
0
def main(args):
    random.seed(0)
    np.random.seed(0)
    if args.gpu >= 0:
        cuda.get_device_from_id(args.gpu).use()
        cuda.cupy.random.seed(0)

    dataset, id2ene = load_dataset(args.dataset, args.features, args.redirects)
    print(f'# of examples in dataset: {len(dataset)}')

    def batch2tensors(batch, device):
        xp = cuda.cupy if device >= 0 else np

        xf = xp.zeros((len(batch), args.n_feature), dtype='f')
        xe = xp.zeros((len(batch), args.embed_size), dtype='f')
        t = xp.zeros((len(batch), len(id2ene)), dtype='i')

        for i, item in enumerate(batch):
            for feature_id in item['feature_ids']:
                if feature_id < args.n_feature:
                    xf[i, feature_id] = 1.0

            if item['embedding']:
                xe[i] = xp.array(item['embedding'], dtype='f')

            for ene_id in item['ene_ids']:
                t[i, ene_id] = 1

        x = xp.concatenate((xf, xe), axis=1)

        return x, t

    cv_datasets = get_cross_validation_datasets(dataset, args.cv)
    ys = []
    ts = []
    for split_idx, cv_dataset in enumerate(cv_datasets):
        print(f'cross validation ({split_idx + 1}/{len(cv_datasets)})')
        train, test = cv_dataset
        train_iter = SerialIterator(train, batch_size=args.batch)
        test_iter = SerialIterator(test,
                                   batch_size=args.batch,
                                   repeat=False,
                                   shuffle=False)

        model = ENEClassifier(in_size=args.n_feature + args.embed_size,
                              hidden_size=args.hidden_size,
                              out_size=len(id2ene))

        if args.gpu >= 0:
            model.to_gpu(args.gpu)

        optimizer = optimizers.Adam()
        optimizer.setup(model)
        updater = StandardUpdater(train_iter,
                                  optimizer,
                                  converter=batch2tensors,
                                  device=args.gpu)

        trainer = Trainer(updater, (args.epoch, 'epoch'), out=args.out_dir)
        trainer.extend(extensions.LogReport())
        trainer.extend(
            extensions.snapshot_object(
                model, filename='epoch_{.updater.epoch}.model'))
        trainer.extend(
            extensions.Evaluator(test_iter,
                                 model,
                                 converter=batch2tensors,
                                 device=args.gpu))
        trainer.extend(
            extensions.PrintReport(
                ['epoch', 'main/loss', 'validation/main/loss',
                 'elapsed_time']))
        trainer.extend(extensions.ProgressBar(update_interval=1))

        trainer.run()

        test_iter.reset()
        for batch in test_iter:
            x, t = batch2tensors(batch, device=args.gpu)
            with chainer.using_config('train', False):
                y = model.predict(x)

            ys.append(y)
            ts.append(t)

    y_all = F.concat(ys, axis=0)
    t_all = F.concat(ts, axis=0)

    prediction_matrix = (y_all.data >= 0.5).astype('f')
    reference_matrix = (t_all.data == 1).astype('f')
    accuracy_matrix = prediction_matrix * reference_matrix

    eb_pred = prediction_matrix.sum(
        axis=1)  # entity-based num. of predicted classes
    eb_ref = reference_matrix.sum(
        axis=1)  # entity-based num. of reference classes
    eb_acc = accuracy_matrix.sum(
        axis=1)  # entity-based num. of accurate classes

    eb_nopred = (eb_pred == 0.).astype('f')  # for avoiding zero-division
    eb_precision = (eb_acc / (eb_pred + eb_nopred)).mean()
    eb_recall = (eb_acc / eb_ref).mean()
    eb_f1 = (2 * eb_acc / (eb_pred + eb_ref)).mean()

    cb_pred = prediction_matrix.sum(
        axis=0)  # class-based num. of predicted examples
    cb_ref = reference_matrix.sum(
        axis=0)  # class-based num. of reference examples
    cb_acc = accuracy_matrix.sum(
        axis=0)  # class-based num. of accurate examples

    cb_nopred = (cb_pred == 0.).astype('f')  # for avoiding zero-division
    cb_macro_precision = (cb_acc / (cb_pred + cb_nopred)).mean()
    cb_macro_recall = (cb_acc / cb_ref).mean()
    cb_macro_f1 = (2 * cb_acc / (cb_pred + cb_ref)).mean()

    cb_micro_precision = cb_acc.sum() / cb_pred.sum()
    cb_micro_recall = cb_acc.sum() / cb_ref.sum()
    cb_micro_f1 = (2 * cb_acc.sum()) / (cb_pred.sum() + cb_ref.sum())

    print(f'Entity-based Precision:      {float(eb_precision):.2%}')
    print(f'Entity-based Recall:         {float(eb_recall):.2%}')
    print(f'Entity-based F1 score:       {float(eb_f1):.2%}')

    print(f'Class-based macro Precision: {float(cb_macro_precision):.2%}')
    print(f'Class-based macro Recall:    {float(cb_macro_recall):.2%}')
    print(f'Class-based macro F1 score:  {float(cb_macro_f1):.2%}')

    print(f'Class-based micro Precision: {float(cb_micro_precision):.2%}')
    print(f'Class-based micro Recall:    {float(cb_micro_recall):.2%}')
    print(f'Class-based micro F1 score:  {float(cb_micro_f1):.2%}')

    print(f'writing out classification results')
    with open(Path(args.out_dir) / 'classification_result.json', 'w') as fo:
        for i, item in tqdm(enumerate(dataset)):
            title = item['title']
            predicted_classes = [
                id2ene[j] for j, v in enumerate(prediction_matrix[i])
                if v == 1.0
            ]
            reference_classes = [
                id2ene[j] for j, v in enumerate(reference_matrix[i])
                if v == 1.0
            ]
            out = {
                'title': title,
                'prediction': predicted_classes,
                'reference': reference_classes
            }
            print(json.dumps(out, ensure_ascii=False), file=fo)