Example #1
0
import pylab as plt

# Set the basic configuration of the logging system
logging.basicConfig(level=logging.DEBUG,
                    format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
                    datefmt='%m-%d %H:%M')
sys.path.append('../source/')
logger = logging.getLogger(__name__)

from grcnn import GrCNNBagger
from config import GrCNNConfiger
from wordvec import WordEmbedding

model_filename = './grbagger.model'
start_time = time.time()
grbagger = GrCNNBagger.load(model_filename)
end_time = time.time()
logger.debug('Time used to load the model: %f seconds.' % (end_time-start_time))

np.random.seed(1991)
senti_train_filename = '../data/sentiment-train.txt'
senti_test_filename = '../data/sentiment-test.txt'
senti_train_txt, senti_train_label = [], []
senti_test_txt, senti_test_label = [], []
start_time = time.time()
# Read training data set
with file(senti_train_filename, 'r') as fin:
    reader = csv.reader(fin, delimiter='|')
    for txt, label in reader:
        senti_train_txt.append(txt)
        senti_train_label.append(int(label))
logger.debug('Time used to build initial matrices: %f seconds.' % (end_time-start_time))
p_count = np.sum(senti_train_label)
logger.debug('Default positive percentage in Train: %f' % (float(p_count) / train_size))
logger.debug('Default negative percentage in Train: %f' % (float(train_size-p_count) / train_size))
p_count = np.sum(senti_test_label)
logger.debug('Default positive percentage in Test: %f' % (float(p_count) / test_size))
logger.debug('Default negative percentage in Test: %f' % (float(test_size-p_count) / test_size))
# If there is a designated model, using it, else start from scratch
start_time = time.time()
if args.model == 'NONE':
    logger.debug('No designated model, training from scratch...')
    configer = GrCNNConfiger(args.config)
    grbagger = GrCNNBagger(configer, verbose=True)
else:
    logger.debug('There is a designated model, loading: {}'.format(args.model))
    grbagger = GrCNNBagger.load(args.model)
end_time = time.time()
logger.debug('Time used to building the model: %f seconds.' % (end_time-start_time))
logger.debug('Training start...')
# Initialize model training configuration
learn_rate = args.rate
batch_size = args.size
epoch = args.epoch
# Training using AdaGrad
training_threshold_epoch = 30
highest_train_accuracy, highest_test_accuracy = 0.0, 0.0
track_training_acc, track_training_cost = [], []
track_test_acc, track_test_cost = [], []
try:
    start_time = time.time()
    sample_size = 0