예제 #1
0
파일: da.py 프로젝트: zxdcs/sentiment
    )

    start_time = time.clock()

    ############
    # TRAINING #
    ############

    # go through training epochs
    for epoch in range(training_epochs):
        # go through trainng set
        c = []
        for batch_index in range(n_train_batches):
            c.append(train_da(batch_index)[1])
            print('batch %d complete' % batch_index)

        # print 'Training epoch %d, cost ' % epoch, numpy.mean(c)
        print('Training epoch %d, sample is ' % epoch, c[-1])

    end_time = time.clock()

    training_time = (end_time - start_time)

    print('The no corruption code for file ' +
          os.path.split(__file__)[1] +
          ' ran for %.2fm' % (training_time / 60.))


if __name__ == '__main__':
    test_dA(load_data(r'D:\workspace\sentiment\data_balanced\lexical.txt'))
예제 #2
0
        raise TypeError('y_real should have the same shape as y_pred',
                        ('y_real ', len(y_real), 'y_pred', len(y_pred)))
    count = numpy.zeros([label_num, label_num])
    for real, pred in zip(y_real, y_pred):
        count[real][pred] += 1
    precison = count[target][target] / numpy.sum(count, axis=0)[target]
    recall = count[target][target] / numpy.sum(count, axis=1)[target]
    fscore = 2 * precison * recall / (precison + recall)
    print('p:{0:f} r:{1:f} f:{2:f}'.format(precison, recall, fscore))
    return fscore


def corss_validation():
    k = 4
    avg_score = 0
    x, y = read_data(r'..\data\data_balanced\lexical_vec_avg.txt')
    for p_st in [x / k for x in range(0, k)]:
        p_en = p_st + 1 / k
        datas = split_data(x, y, p_st, p_en)
        score = sgd_optimization(datas, n_epochs=100)
        avg_score += score
    avg_score /= k
    print('Average score is: {0:f}'.format(avg_score))


if __name__ == '__main__':
    sgd_optimization(load_data(r'..\data\data_balanced\lexical_vec_avg.txt',
                               sp_idx=3701),
                     n_epochs=1000,
                     batch_size=100)
    # corss_validation()
예제 #3
0
        for minibatch_index in range(n_train_batches):
            minibatch_avg_cost = train_fn(minibatch_index)

        # compute f-score on validation set
        y_preds = [validate_model(i) for i in range(n_valid_batches)]
        y_pred = [pij for pi in y_preds for pij in pi]
        y_real = valid_set_y.get_value(borrow=True)
        fscore, precison, recall = f_score(y_real, y_pred)
        print(
            'epoch {0:d}, fscore {1:f}  precision {2:f}  recall {3:f}'.format(
                epoch, fscore, precison, recall))

        # if we got the best validation score until now
        if fscore > best_fscore:
            best_fscore = fscore
            print('-----Best score: {0:f}-----'.format(best_fscore))

    end_time = time.clock()
    print('Optimization complete with best validation score of {0:.1f} %,'.
          format(best_fscore * 100.))
    print('The fine tuning code for file ' + os.path.split(__file__)[1] +
          ' ran for %.2fm' % ((end_time - start_time) / 60.))


if __name__ == '__main__':
    test_DBN(load_data(r'D:\workspace\sentiment\data_balanced\filtered.txt'),
             pretraining_epochs=10,
             training_epochs=100,
             batch_size=20,
             k=1)
예제 #4
0
파일: msda.py 프로젝트: zxdcs/sentiment
    end_time = time.clock()
    print('Optimization complete with best validation score: fscore {0:f}  precision {1:f}  recall {2:f},'
          .format(best_fscore[0], best_fscore[1], best_fscore[2]))
    print('The training code for file ' +
          os.path.split(__file__)[1] +
          ' ran for %.2fm' % ((end_time - start_time) / 60.))
    if seq_output:
        print('writing sequence output')
        seq_output_file(r'..\data\data_seq\seq_train_raw.txt', seq_output_train,
                        datasets_modals[0][0][1].get_value(borrow=True))
        seq_output_file(r'..\data\data_seq\seq_test_raw.txt', seq_output_valid,
                        datasets_modals[0][1][1].get_value(borrow=True))


def seq_output_file(file, output_data, real_labels):
    f = open(file, 'w', encoding='utf-8')
    for feature, predict, real in zip(output_data[0], output_data[1], real_labels):
        f.write(' '.join(map(lambda i: '{0:.0f}'.format(i * 10), feature)))
        f.write(' {0:.0f} {1:.0f}\n'.format(predict, real))
    f.close()


if __name__ == '__main__':
    # acoustic_data = load_data(r'..\data\data_balanced\acoustic.txt', sp_idx=3701)
    # text_data = load_data(r'..\data\data_balanced\lexical_vec_avg.txt', sp_idx=3701)
    # test_mmsda([acoustic_data, text_data], pretraining_epochs=1, training_epochs=3, batch_size=50, seq_output=True)
    acoustic_data = load_data(r'..\data\data_context\acoustic.txt', sp_idx=23993)
    text_data = load_data(r'..\data\data_context\lexical_vec_avg.txt', sp_idx=23993)
    test_mmsda([acoustic_data, text_data], pretraining_epochs=50, training_epochs=400,
               batch_size=50, seq_output=False)
예제 #5
0
파일: rbm.py 프로젝트: zxdcs/sentiment
            presig_vis,
            vis_mfs,
            vis_samples
        ],
        updates
    ) = theano.scan(
        rbm.gibbs_vhv,
        outputs_info=[None, None, None, None, None, persistent_vis_chain],
        n_steps=plot_every
    )

    # add to updates the shared variable that takes care of our persistent
    # chain :.
    updates.update({persistent_vis_chain: vis_samples[-1]})
    # construct the function that implements our persistent chain.
    # we generate the "mean field" activations for plotting and the actual
    # samples for reinitializing the state of our persistent chain
    sample_fn = theano.function(
        [], [vis_mfs[-1], vis_samples[-1]], updates=updates, name='sample_fn'
    )

    for idx in range(n_samples):
        # generate `plot_every` intermediate samples that we discard,
        # because successive samples in the chain are too correlated
        vis_mf, vis_sample = sample_fn()
        # do some output here!!!


if __name__ == '__main__':
    test_rbm(load_data(r'D:\workspace\sentiment\data_balanced\lexical.txt'), n_hidden=500)
예제 #6
0
파일: dbn.py 프로젝트: zxdcs/sentiment
    epoch = 0
    while epoch < training_epochs:
        epoch += 1
        for minibatch_index in range(n_train_batches):
            minibatch_avg_cost = train_fn(minibatch_index)

        # compute f-score on validation set
        y_preds = [validate_model(i) for i in range(n_valid_batches)]
        y_pred = [pij for pi in y_preds for pij in pi]
        y_real = valid_set_y.get_value(borrow=True)
        fscore, precison, recall = f_score(y_real, y_pred)
        print('epoch {0:d}, fscore {1:f}  precision {2:f}  recall {3:f}'.format(epoch, fscore, precison, recall))

        # if we got the best validation score until now
        if fscore > best_fscore:
            best_fscore = fscore
            print('-----Best score: {0:f}-----'.format(best_fscore))

    end_time = time.clock()
    print('Optimization complete with best validation score of {0:.1f} %,'
          .format(best_fscore * 100.))
    print('The fine tuning code for file ' +
          os.path.split(__file__)[1] +
          ' ran for %.2fm' % ((end_time - start_time) / 60.))


if __name__ == '__main__':
    test_DBN(load_data(r'D:\workspace\sentiment\data_balanced\filtered.txt'), pretraining_epochs=10,
             training_epochs=100,
             batch_size=20, k=1)
예제 #7
0
파일: sda.py 프로젝트: zxdcs/sentiment
        # compute f-score on validation set
        y_preds = [validate_model(i) for i in range(n_valid_batches)]
        y_pred = [pij for pi in y_preds for pij in pi]
        y_real = valid_set_y.get_value(borrow=True)
        print(y_pred)
        fscore, precison, recall = f_score(y_real, y_pred)
        print(
            'epoch {0:d}, fscore {1:f}  precision {2:f}  recall {3:f}'.format(
                epoch, fscore, precison, recall))

        # if we got the best validation score until now
        if fscore > best_fscore[0]:
            best_fscore = (fscore, precison, recall)
            print('-----Best score: {0:f}-----'.format(fscore))

    end_time = time.clock()
    print(
        'Optimization complete with best validation score: fscore {0:f}  precision {1:f}  recall {2:f},'
        .format(best_fscore[0], best_fscore[1], best_fscore[2]))
    print('The training code for file ' + os.path.split(__file__)[1] +
          ' ran for %.2fm' % ((end_time - start_time) / 60.))


if __name__ == '__main__':
    # test_SdA(load_data(r'..\data\data_balanced\acous_lex_avg.txt', sp_idx=3701),
    # pretraining_epochs=50, training_epochs=500, batch_size=50)
    test_SdA(load_data(r'..\data\data_all\acous_lex_avg.txt', sp_idx=23993),
             pretraining_epochs=50,
             training_epochs=500,
             batch_size=50)
예제 #8
0
파일: grbm.py 프로젝트: zxdcs/sentiment
    plot_every = 1000
    # define one step of Gibbs sampling (mf = mean-field) define a
    # function that does `plot_every` steps before returning the
    # sample for plotting
    ([presig_hids, hid_mfs, hid_samples, presig_vis, vis_mfs,
      vis_samples], updates) = theano.scan(
          rbm.gibbs_vhv,
          outputs_info=[None, None, None, None, None, persistent_vis_chain],
          n_steps=plot_every)

    # add to updates the shared variable that takes care of our persistent
    # chain :.
    updates.update({persistent_vis_chain: vis_samples[-1]})
    # construct the function that implements our persistent chain.
    # we generate the "mean field" activations for plotting and the actual
    # samples for reinitializing the state of our persistent chain
    sample_fn = theano.function([], [vis_mfs[-1], vis_samples[-1]],
                                updates=updates,
                                name='sample_fn')

    for idx in range(n_samples):
        # generate `plot_every` intermediate samples that we discard,
        # because successive samples in the chain are too correlated
        vis_mf, vis_sample = sample_fn()
        # do some output here!!!


if __name__ == '__main__':
    test_rbm(load_data(r'D:\workspace\sentiment\data_balanced\acoustic.txt'))
예제 #9
0
파일: mlp.py 프로젝트: zxdcs/sentiment
        # if we got the best validation score until now
        if fscore > best_fscore:
            best_fscore = fscore
            print('-----Best score: {0:f}-----'.format(best_fscore))

    end_time = time.clock()
    print('Optimization complete with best validation score of {0:.1f} %,'
          .format(best_fscore * 100.))
    print('The code for file ' +
          os.path.split(__file__)[1] +
          ' ran for %.2fm' % ((end_time - start_time) / 60.))


def f_score(y_real, y_pred, target=1, label_num=2):
    if len(y_real) != len(y_pred):
        raise TypeError(
            'y_real should have the same shape as y_pred',
            ('y_real ', len(y_real), 'y_pred', len(y_pred))
        )
    count = numpy.zeros([label_num, label_num])
    for real, pred in zip(y_real, y_pred):
        count[real][pred] += 1
    precison = count[target][target] / numpy.sum(count, axis=0)[target]
    recall = count[target][target] / numpy.sum(count, axis=1)[target]
    fscore = 2 * precison * recall / (precison + recall)
    return fscore


if __name__ == '__main__':
    test_mlp(load_data(r'D:\workspace\sentiment\data_balanced\lexical.txt'), n_epochs=100)
예제 #10
0
파일: gdbn.py 프로젝트: zxdcs/sentiment
        # compute f-score on validation set
        y_preds = [validate_model(i) for i in range(n_valid_batches)]
        y_pred = [pij for pi in y_preds for pij in pi]
        y_real = valid_set_y.get_value(borrow=True)
        print(y_pred)
        fscore, precison, recall = f_score(y_real, y_pred)
        print(
            'epoch {0:d}, fscore {1:f}  precision {2:f}  recall {3:f}'.format(
                epoch, fscore, precison, recall))

        # if we got the best validation score until now
        if fscore > best_fscore[0]:
            best_fscore = (fscore, precison, recall)
            print('-----Best score: {0:f}-----'.format(fscore))

    end_time = time.clock()
    print(
        'Optimization complete with best validation score: fscore {0:f}  precision {1:f}  recall {2:f},'
        .format(best_fscore[0], best_fscore[1], best_fscore[2]))
    print('The fine tuning code for file ' + os.path.split(__file__)[1] +
          ' ran for %.2fm' % ((end_time - start_time) / 60.))


if __name__ == '__main__':
    test_DBN(load_data(r'..\data\data_balanced\acoustic.txt', sp_idx=3701),
             pretraining_epochs=50,
             training_epochs=300,
             batch_size=50,
             k=1)
예제 #11
0
파일: gdbn.py 프로젝트: zxdcs/sentiment
    batches_idx = list(range(n_train_batches))
    while epoch < training_epochs:
        epoch += 1
        random.shuffle(batches_idx)
        for minibatch_index in batches_idx:
            minibatch_avg_cost = train_fn(minibatch_index)

        # compute f-score on validation set
        y_preds = [validate_model(i) for i in range(n_valid_batches)]
        y_pred = [pij for pi in y_preds for pij in pi]
        y_real = valid_set_y.get_value(borrow=True)
        print(y_pred)
        fscore, precison, recall = f_score(y_real, y_pred)
        print('epoch {0:d}, fscore {1:f}  precision {2:f}  recall {3:f}'.format(epoch, fscore, precison, recall))

        # if we got the best validation score until now
        if fscore > best_fscore[0]:
            best_fscore = (fscore, precison, recall)
            print('-----Best score: {0:f}-----'.format(fscore))

    end_time = time.clock()
    print('Optimization complete with best validation score: fscore {0:f}  precision {1:f}  recall {2:f},'
          .format(best_fscore[0], best_fscore[1], best_fscore[2]))
    print('The fine tuning code for file ' + os.path.split(__file__)[1] +
          ' ran for %.2fm' % ((end_time - start_time) / 60.))


if __name__ == '__main__':
    test_DBN(load_data(r'..\data\data_balanced\acoustic.txt', sp_idx=3701), pretraining_epochs=50,
             training_epochs=300, batch_size=50, k=1)
예제 #12
0
파일: da.py 프로젝트: zxdcs/sentiment
        updates=updates,
        givens={x: train_set_x[index * batch_size:(index + 1) * batch_size]})

    start_time = time.clock()

    ############
    # TRAINING #
    ############

    # go through training epochs
    for epoch in range(training_epochs):
        # go through trainng set
        c = []
        for batch_index in range(n_train_batches):
            c.append(train_da(batch_index)[1])
            print('batch %d complete' % batch_index)

        # print 'Training epoch %d, cost ' % epoch, numpy.mean(c)
        print('Training epoch %d, sample is ' % epoch, c[-1])

    end_time = time.clock()

    training_time = (end_time - start_time)

    print('The no corruption code for file ' + os.path.split(__file__)[1] +
          ' ran for %.2fm' % (training_time / 60.))


if __name__ == '__main__':
    test_dA(load_data(r'D:\workspace\sentiment\data_balanced\lexical.txt'))
예제 #13
0
        raise TypeError(
            'y_real should have the same shape as y_pred',
            ('y_real ', len(y_real), 'y_pred', len(y_pred))
        )
    count = numpy.zeros([label_num, label_num])
    for real, pred in zip(y_real, y_pred):
        count[real][pred] += 1
    precison = count[target][target] / numpy.sum(count, axis=0)[target]
    recall = count[target][target] / numpy.sum(count, axis=1)[target]
    fscore = 2 * precison * recall / (precison + recall)
    print('p:{0:f} r:{1:f} f:{2:f}'.format(precison, recall, fscore))
    return fscore


def corss_validation():
    k = 4
    avg_score = 0
    x, y = read_data(r'..\data\data_balanced\lexical_vec_avg.txt')
    for p_st in [x / k for x in range(0, k)]:
        p_en = p_st + 1 / k
        datas = split_data(x, y, p_st, p_en)
        score = sgd_optimization(datas, n_epochs=100)
        avg_score += score
    avg_score /= k
    print('Average score is: {0:f}'.format(avg_score))


if __name__ == '__main__':
    sgd_optimization(load_data(r'..\data\data_balanced\lexical_vec_avg.txt', sp_idx=3701),
                     n_epochs=1000, batch_size=100)
    # corss_validation()