import pylab as plt # Set the basic configuration of the logging system logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s', datefmt='%m-%d %H:%M') sys.path.append('../source/') logger = logging.getLogger(__name__) from grcnn import GrCNNBagger from config import GrCNNConfiger from wordvec import WordEmbedding model_filename = './grbagger.model' start_time = time.time() grbagger = GrCNNBagger.load(model_filename) end_time = time.time() logger.debug('Time used to load the model: %f seconds.' % (end_time-start_time)) np.random.seed(1991) senti_train_filename = '../data/sentiment-train.txt' senti_test_filename = '../data/sentiment-test.txt' senti_train_txt, senti_train_label = [], [] senti_test_txt, senti_test_label = [], [] start_time = time.time() # Read training data set with file(senti_train_filename, 'r') as fin: reader = csv.reader(fin, delimiter='|') for txt, label in reader: senti_train_txt.append(txt) senti_train_label.append(int(label))
logger.debug('Time used to build initial matrices: %f seconds.' % (end_time-start_time)) p_count = np.sum(senti_train_label) logger.debug('Default positive percentage in Train: %f' % (float(p_count) / train_size)) logger.debug('Default negative percentage in Train: %f' % (float(train_size-p_count) / train_size)) p_count = np.sum(senti_test_label) logger.debug('Default positive percentage in Test: %f' % (float(p_count) / test_size)) logger.debug('Default negative percentage in Test: %f' % (float(test_size-p_count) / test_size)) # If there is a designated model, using it, else start from scratch start_time = time.time() if args.model == 'NONE': logger.debug('No designated model, training from scratch...') configer = GrCNNConfiger(args.config) grbagger = GrCNNBagger(configer, verbose=True) else: logger.debug('There is a designated model, loading: {}'.format(args.model)) grbagger = GrCNNBagger.load(args.model) end_time = time.time() logger.debug('Time used to building the model: %f seconds.' % (end_time-start_time)) logger.debug('Training start...') # Initialize model training configuration learn_rate = args.rate batch_size = args.size epoch = args.epoch # Training using AdaGrad training_threshold_epoch = 30 highest_train_accuracy, highest_test_accuracy = 0.0, 0.0 track_training_acc, track_training_cost = [], [] track_test_acc, track_test_cost = [], [] try: start_time = time.time() sample_size = 0