コード例 #1
0
ファイル: test_io.py プロジェクト: wbaek/machinelearning
def main(args):
    np.random.seed(0xC0FFEE)
    n = Network()
    n.layers.append( Fullconnect(2, 10, ReLu.function, ReLu.derivative) )
    n.layers.append( Fullconnect(10, 2) )

    x = np.array([[1, 2, 1, 2,  5, 6, 5, 6],
                  [5, 4, 4, 5,  1, 2, 2, 1]])
    t = np.array([[1, 1, 1, 1,  0, 0, 0, 0],
                  [0, 0, 0, 0,  1, 1, 1, 1]])

    for epoch in range(0, 20):
        loss = n.train( x, t )

    pkl.dump( n.dump_params().copy(), open(args.dump_params, 'wb') )
    logging.info('pickle dump done')


    nn = Network()
    nn.layers.append( Fullconnect(2, 10, ReLu.function, ReLu.derivative) )
    nn.layers.append( Fullconnect(10, 2) )


    nn.load_params( pkl.load( open('test.pkl', 'rb') ).copy() )
    logging.info('pickle load done')

    print 'before:', [['%.2f'%_ for _ in v] for v in n.predict( x )]
    print 'after: ', [['%.2f'%_ for _ in v] for v in nn.predict( x )]
コード例 #2
0
def main(args):
    np.random.seed(0xC0FFEE)
    n = Network()
    n.layers.append(Fullconnect(2, 10, ReLu.function, ReLu.derivative))
    n.layers.append(Fullconnect(10, 2))

    x = np.array([[1, 2, 1, 2, 5, 6, 5, 6], [5, 4, 4, 5, 1, 2, 2, 1]])
    t = np.array([[1, 1, 1, 1, 0, 0, 0, 0], [0, 0, 0, 0, 1, 1, 1, 1]])

    for epoch in range(0, 20):
        loss = n.train(x, t)

    pkl.dump(n.dump_params().copy(), open(args.dump_params, 'wb'))
    logging.info('pickle dump done')

    nn = Network()
    nn.layers.append(Fullconnect(2, 10, ReLu.function, ReLu.derivative))
    nn.layers.append(Fullconnect(10, 2))

    nn.load_params(pkl.load(open('test.pkl', 'rb')).copy())
    logging.info('pickle load done')

    print 'before:', [['%.2f' % _ for _ in v] for v in n.predict(x)]
    print 'after: ', [['%.2f' % _ for _ in v] for v in nn.predict(x)]
コード例 #3
0
def main(args):
    np.random.seed(0xC0FFEE)

    logging.info('load data start')
    train_lex, train_y = pkl.load(open('datas/kowiki_spacing_train.pkl', 'r'))
    words2idx = pkl.load(open('datas/kowiki_dict.pkl', 'r'))
    logging.info('load data done')

    index2words = {value: key for key, value in words2idx.iteritems()}

    vocsize = len(words2idx) + 1
    nclasses = 2
    nsentences = len(train_lex)

    context_window_size = args.window_size
    minibatch = args.minibatch
    learning_rate = args.learning_rate
    logging.info(
        'vocsize:%d, nclasses:%d, window-size:%d, minibatch:%d, learning-rate:%.5f'
        % (vocsize, nclasses, context_window_size, minibatch, learning_rate))

    n = Network()
    n.layers.append(
        Fullconnect(vocsize,
                    256,
                    Tanh.function,
                    Tanh.derivative,
                    updater=GradientDescent(learning_rate)))
    n.layers.append(
        Recurrent(256,
                  256,
                  ReLU.function,
                  ReLU.derivative,
                  updater=GradientDescent(learning_rate)))
    n.layers.append(
        Fullconnect(256,
                    256,
                    ReLU.function,
                    ReLU.derivative,
                    updater=GradientDescent(learning_rate)))
    n.layers.append(
        Fullconnect(256, nclasses, updater=GradientDescent(learning_rate)))
    n.activation = Softmax(is_zero_pad=True)

    if os.path.isfile(args.params):
        logging.info('load parameters from %s' % args.params)
        n.load_params(pkl.load(open(args.params, 'rb')))

    logging.info('train start')
    for epoch in xrange(0, args.epoch):
        epoch_loss = 0
        epoch_error_rate = 0
        max_iterations = min(args.samples, nsentences) / minibatch
        for i in xrange(max_iterations):
            max_size_of_sequence = 100
            idxs = [
                random.randint(0, nsentences - 1) for _ in range(minibatch)
            ]
            cwords = [
                contextwin(train_lex[idx][:max_size_of_sequence],
                           context_window_size) for idx in idxs
            ]
            words_labels = [
                onehotvector(cword, vocsize,
                             train_y[idx][:max_size_of_sequence], nclasses)
                for idx, cword in zip(idxs, cwords)
            ]

            words = [word for word, label in words_labels]
            labels = [label for word, label in words_labels]

            # zero padding for minibatch
            max_size_of_sequence = max([_.shape[0] for _ in words])
            for k, (word, label) in enumerate(zip(words, labels)):
                size_of_sequence = word.shape[0]
                words[k] = np.pad(
                    word,
                    ((0, max_size_of_sequence - size_of_sequence), (0, 0)),
                    mode='constant')
                labels[k] = np.pad(
                    label,
                    ((0, max_size_of_sequence - size_of_sequence), (0, 0)),
                    mode='constant')

            words = np.swapaxes(np.array(words), 0, 1)
            labels = np.swapaxes(np.array(labels), 0, 1)

            loss = n.train(words, labels) / (max_size_of_sequence * minibatch
                                             )  # sequence normalized loss
            predictions = n.y
            error_rate = n.activation.error(
                predictions, labels) / (max_size_of_sequence * minibatch)

            epoch_loss += loss
            epoch_error_rate += error_rate
            if i % 10 == 0 and i != 0:
                logging.info(
                    '[%.4f%%] epoch:%04d iter:%04d loss:%.5f error-rate:%.5f' %
                    ((i + 1) / float(max_iterations), epoch, i, epoch_loss /
                     (i + 1), epoch_error_rate / (i + 1)))

        logging.info('epoch:%04d loss:%.5f, error-rate:%.5f' %
                     (epoch, epoch_loss / max_iterations,
                      epoch_error_rate / max_iterations))
        pkl.dump(n.dump_params(), open(args.params, 'wb'))
        logging.info('dump parameters at %s' % (args.params))
コード例 #4
0
ファイル: rnn_spacing.py プロジェクト: wbaek/machinelearning
def main(args):
    np.random.seed(0xC0FFEE)

    logging.info('load data start')
    train_lex, train_y = pkl.load( open('datas/kowiki_spacing_train.pkl', 'r') )
    words2idx = pkl.load( open('datas/kowiki_dict.pkl', 'r') )
    logging.info('load data done')

    index2words = {value:key for key, value in words2idx.iteritems()}

    vocsize = len(words2idx) + 1
    nclasses = 2
    nsentences = len(train_lex)

    context_window_size = args.window_size
    minibatch = args.minibatch
    learning_rate = args.learning_rate
    logging.info('vocsize:%d, nclasses:%d, window-size:%d, minibatch:%d, learning-rate:%.5f'%(vocsize, nclasses, context_window_size, minibatch, learning_rate))

    n = Network()
    n.layers.append( Fullconnect(vocsize, 256, Tanh.function, Tanh.derivative,  updater=GradientDescent(learning_rate)) )
    n.layers.append( Recurrent(256, 256, ReLU.function, ReLU.derivative, updater=GradientDescent(learning_rate)) )
    n.layers.append( Fullconnect(256, 256, ReLU.function, ReLU.derivative, updater=GradientDescent(learning_rate)) )
    n.layers.append( Fullconnect(256, nclasses, updater=GradientDescent(learning_rate)) )
    n.activation = Softmax(is_zero_pad=True)

    if os.path.isfile( args.params ):
        logging.info('load parameters from %s'%args.params)
        n.load_params( pkl.load(open(args.params, 'rb')) )

    logging.info('train start')
    for epoch in xrange(0, args.epoch):
        epoch_loss = 0
        epoch_error_rate = 0
        max_iterations = min(args.samples, nsentences) / minibatch
        for i in xrange( max_iterations ):
            max_size_of_sequence = 100
            idxs = [random.randint(0, nsentences-1) for _ in range(minibatch)]
            cwords = [contextwin(train_lex[idx][:max_size_of_sequence], context_window_size) for idx in idxs]
            words_labels = [onehotvector(cword, vocsize, train_y[idx][:max_size_of_sequence], nclasses) for idx, cword in zip(idxs, cwords)]

            words = [word for word, label in words_labels]
            labels = [label for word, label in words_labels]

            # zero padding for minibatch
            max_size_of_sequence = max( [_.shape[0] for _ in words] )
            for k, (word, label) in enumerate(zip(words, labels)):
                size_of_sequence = word.shape[0]
                words[k]  = np.pad(word,  ((0, max_size_of_sequence-size_of_sequence), (0, 0)), mode='constant')
                labels[k] = np.pad(label, ((0, max_size_of_sequence-size_of_sequence), (0, 0)), mode='constant')

            words  = np.swapaxes( np.array(words),  0, 1 )
            labels = np.swapaxes( np.array(labels), 0, 1 )

            loss = n.train( words, labels ) / (max_size_of_sequence * minibatch) # sequence normalized loss
            predictions = n.y
            error_rate = n.activation.error( predictions, labels ) / (max_size_of_sequence * minibatch)

            epoch_loss += loss
            epoch_error_rate += error_rate
            if i%10 == 0 and i != 0:
                logging.info('[%.4f%%] epoch:%04d iter:%04d loss:%.5f error-rate:%.5f'%((i+1)/float(max_iterations), epoch, i, epoch_loss/(i+1), epoch_error_rate/(i+1)))

        logging.info('epoch:%04d loss:%.5f, error-rate:%.5f'%(epoch, epoch_loss/max_iterations, epoch_error_rate/max_iterations))
        pkl.dump( n.dump_params(), open(args.params, 'wb') )
        logging.info('dump parameters at %s'%(args.params))