Example #1
0
def prediction(train_dataSet, train_labels, test_dataSet, test_labels, alpha, b, option,cat):
	
	train_predict_cat = numpy.zeros(len(train_labels))
	test_predict_cat = numpy.zeros(len(test_labels))

	for i in range (len(train_labels)):
		temp = 0
		for j in range(len(train_labels)):
			temp += alpha[j] * train_labels[j] * Kernel(train_dataSet[j], train_dataSet[i], option,cat)
		#print 't', temp	
		#print 'b', temp + b
		if (temp + b) < 0:
			train_predict_cat[i] = -1
		else:
			train_predict_cat[i] = 1
			#print train_predict_cat[i]
	#print numpy.sum(train_predict_cat)
	print 'Accuracy for training dataset: ',evaluation.evaluation(train_predict_cat, train_labels)

	for i in range (len(test_labels)):
		temp = 0
		for j in range(len(train_labels)):
			temp += alpha[j] * test_labels[j] * Kernel(train_dataSet[j], test_dataSet[i], option,cat)
		if (temp + b) < 0:
			test_predict_cat[i] = -1
		else:
			test_predict_cat[i] = 1
	print 'Accuracy for test dataset: ',evaluation.evaluation(test_predict_cat, test_labels)
Example #2
0
def train():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    json_file = open("parameters.json")
    parameters = json.load(json_file)
    json_file.close()
    net = CNNModel(1, 10)
    optimizer = torch.optim.Adam(net.parameters(), lr=parameters["lr"])
    criterion = nn.BCELoss()

    if torch.cuda.is_available():
        net = torch.nn.DataParallel(net,
                                    device_ids=range(
                                        torch.cuda.device_count())).cuda()
        cudnn.benchmark = True
    ecg_dataset = EcgDataset(is_train=True)
    train_loader = torch.utils.data.DataLoader(dataset=ecg_dataset,
                                               batch_size=10)
    for epoch in range(parameters["num_epochs"]):
        net.train()
        for i, (data, label) in enumerate(train_loader):
            data, label = data.to(device), label.to(device)
            output = net(data)
            optimizer.zero_grad()
            loss = criterion(output, label)
            loss.backward()
            optimizer.step()
        print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch + 1,
                                                   parameters["num_epochs"],
                                                   loss.item()))
        evaluation(net)
Example #3
0
def predict(train_dataSet, train_labels, test_dataSet, test_labels, w, b):
	"""
	Predict training and test set

	"""
	train_sample_num = len(train_labels)
	train_feature_num = len(train_dataSet[0])
	test_sample_num = len(test_labels)
	test_feature_num = len(test_dataSet[0])

	train_predict_cat = numpy.zeros(train_sample_num)
	test_predict_cat = numpy.zeros(test_sample_num)

	#predict training set
	for i in range(train_sample_num):
	  if ((numpy.inner(train_dataSet[i],w)+b)) <= 0:
		#print numpy.inner(train_dataSet[i],w)+b
		train_predict_cat[i] = -1
	  else:
	  	#print numpy.inner(train_dataSet[i],w)+b
		train_predict_cat[i] = 1
	#print "# train_size = " + str(train_size)
	print 'Accuracy for training dataset: ',evaluation.evaluation(train_predict_cat, train_labels)

	#predict test set
	for i in range(test_sample_num):
	  if ((numpy.inner(test_dataSet[i], w) + b)) <= 0:
	  	#print numpy.inner(test_dataSet[i],w)+b
		test_predict_cat[i] = -1
	  else:
		test_predict_cat[i] = 1
	#print "# test_size = " + str(test_size)
	print 'Accuracy for test dataset: ',evaluation.evaluation(test_predict_cat, test_labels)
	return train_predict_cat, test_predict_cat
Example #4
0
def main():
    Cluster.GAMMA = 0.95
    ALPHA = 0.1
    truths, predicts = split_phase(flattened_train_data_path)
    pre, coll, f1 = evaluation(truths, predicts)
    print(pre, coll, f1)
    final_pre = merge_phases(predicts, ALPHA)
    pre, coll, f1 = evaluation(truths, final_pre)
    print(pre, coll, f1)
Example #5
0
def train_model(option):
    train_loader = DataLoader(dataset=build_dataset('./data/train_x.pkl',
                                                    './data/train_y.pkl'),
                              batch_size=option.batch_size,
                              shuffle=True)
    test_loader = DataLoader(dataset=build_dataset('./data/test_x.pkl',
                                                   './data/test_y.pkl'),
                             batch_size=option.batch_size,
                             shuffle=True)

    model = biLSTM_CRF(option.embedding_size, option.hidden_size,
                       option.dict_number, option.num_labels)
    optimizer = torch.optim.Adam(model.parameters(), lr=option.lr)
    if option.use_gpu:
        model.cuda()
    if option.pre_trained:
        model.load_state_dict(torch.load(option.pre_trained))

    for epoch in range(option.epochs):
        print(epoch)
        # 创建评价
        train_eva = evaluation(option.label_dict)
        test_eva = evaluation(option.label_dict)
        # 训练
        model.train()
        for step, (batch_x, batch_y, batch_masks) in enumerate(train_loader):
            optimizer.zero_grad()

            if option.use_gpu:
                batch_x = batch_x.cuda()
                batch_y = batch_y.cuda()
                masks = masks.cuda()

            y_pred, loss = model(batch_x, batch_y, batch_masks)
            train_eva.add(y_pred, batch_y)
            loss.backward()
            optimizer.step()
        # 通过测试集验证
        model.eval()
        for step, (batch_x, batch_y) in enumerate(test_loader):
            if option.use_gpu:
                batch_x = batch_x.cuda()
                batch_y = batch_y.cuda()
                masks = masks.cuda()

            y_pred, loss = model(batch_x, batch_y, batch_masks)
            test_eva.add(y_pred, batch_y)
        print("train:")
        print(train_eva.evaluate())
        print("test:")
        print(test_eva.evaluate())
Example #6
0
def main(args):
    #load data
    if args.mode == 'train':
        data_loader_train = data_load.data_loader(args.train_feat,
                                                  args.train_phn,
                                                  args.batch_size,
                                                  meta_path=args.meta,
                                                  max_length=args.max_length,
                                                  is_training=True)
    else:
        data_loader_train = data_load.data_loader(args.train_feat,
                                                  args.train_phn,
                                                  args.batch_size,
                                                  meta_path=args.meta,
                                                  max_length=args.max_length,
                                                  is_training=True)
        data_loader_test = data_load.data_loader(args.test_feat,
                                                 args.test_phn,
                                                 args.batch_size,
                                                 max_length=args.max_length,
                                                 is_training=False)

    #add some feature to args
    args.feat_dim = data_loader_train.feat_dim
    args.vocab_size = data_loader_train.vocab_size

    #build model graph
    if args.mode == 'train':
        g = model(args)
    else:
        g = model(args, is_training=False)
    print("Graph loaded")

    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    #create sess
    with tf.Session(graph=g.graph) as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver(max_to_keep=3)

        if (args.mode != 'train') or (args.load == 'load'):
            print('load_model')
            saver.restore(sess, tf.train.latest_checkpoint(args.save_dir))

        if args.mode == 'train':
            print('training')
            train(sess, g, args, saver, data_loader_train)
        else:
            print('evaluating')
            evaluation(sess, g, args, data_loader_train, data_loader_test)
Example #7
0
def test():
    path = '../data/reason'
    # out = open('../out/outlierTree.csv', 'w')
    # out.write('name, correct, per\n')
    # out.close()
    for _, _, files in os.walk(path):  # root 根目录,dirs 子目录
        for filename in files:
            if str(filename)[-4:] == '.csv' and str(filename)[:1] == 'c':
                filepath = path + "/" + str(filename)
                df_true = pd.read_csv(filepath)
                df = df_true.drop(['label', 'reason'], axis=1)
                for i in range(10):
                    df_pre = run_outlierTree(df)   # 只有待判断属性列
                    evaluation(df_true, df_pre, outpath='../out/outlierTree', name=filename[:-4])
Example #8
0
def main(model_type):

    if model_type == macro._JOINTREALNVP:
        prior_z = dataset.gauss_sample(n_sample=10000, dim=2 + 2)
        mask = torch.from_numpy(np.array([0, 1, 0, 1]).astype(np.float32))
        model = flow.JointRealNVP(input_dim=2 + 2,
                                  output_dim=2 + 2,
                                  hid_dim=512,
                                  mask=mask,
                                  n_layers=8)
        optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    elif model_type == macro._CONDITIONALREALNVP:
        prior_z = dataset.gauss_sample(n_sample=10000, dim=2)
        mask = torch.from_numpy(np.array([0, 1]).astype(np.float32))
        model = flow.ConditionalRealNVP(input_dim=2 + 2,
                                        output_dim=2,
                                        hid_dim=512,
                                        mask=mask,
                                        n_layers=8)
        optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

    sampled_x, sampled_labels = dataset.doublemoon_sample(n_sample=10000)
    train_loader = DataLoader(TensorDataset(sampled_x, sampled_labels),
                              batch_size=64,
                              shuffle=True)

    model.train()
    train_loss = 0
    for epoch in range(macro._EPOCH):
        for batch_idx, (data, labels) in enumerate(train_loader):
            optimizer.zero_grad()
            z, log_det_j_sum = model(data, labels)

            if model_type == macro._JOINTREALNVP:
                y = z[:, len(z[0]) - len(labels[0]):]
                log_prob_loss = -(prior_z.log_prob(z) + log_det_j_sum).mean()
                mse_loss = F.mse_loss(y, labels)
                loss = log_prob_loss + macro._LAMBDA * mse_loss
            elif model_type == macro._CONDITIONALREALNVP:
                loss = -(prior_z.log_prob(z) + log_det_j_sum).mean()
            loss.backward()
            train_loss += loss.item()
            optimizer.step()

        print('Epoch: {} Average loss: {:.4f}'.format(\
        epoch, train_loss / (len(train_loader.dataset)*len(z[0]))))

    eval.evaluation(model, model_type)
def annealing(n,cnt):
    maze = generateMatrix(n)
    first = evaluation(maze,n)
    minima = first
    print("Init energy: %d" %(first))
    #print(maze)
    backup = maze;

    temperature=2.5
    decay = 0.99999
    
    for i in range(cnt):
        #randomly choose a cell to change, make sure it's not goal state
        goal=[n-1,n-1]
        new_cell=goal
        while new_cell == goal:
            row = rd.randint(0,n-1)
            col = rd.randint(0,n-1)
            new_cell=[row,col]
    
        #change the jump number, make sure it's not same as before
        old_step = maze[row,col]
        new_step = old_step
        maxrnd = max(n-row-1,n-col-1,row,col)
        while new_step == old_step:
            new_step = rd.randint(1,maxrnd) 
        maze[row,col] = new_step
    
        #get the new energy
        nexte = evaluation(maze,n)
        if i % 1000 == 0:
            print("Current energy: %d" %(minima))
            print("temp diff %d" %(first-nexte))
            print("temp %f" %(temperature*0.5))
            #print(np.exp((first-nexte)/temperature))
            

        #if nexte <= first or rd.random()< np.exp((first-nexte)/temperature):
        if nexte <= first or rd.random()< temperature*0.5:
            first = nexte # minima < nexte < first, not minima keep going
            if nexte < minima: #new minima, store the matrix
                minima = nexte
                backup = maze
        else:
            maze = backup
        temperature = temperature*decay
    
    return (minima,maze)
Example #10
0
def main():
    args = get_args()
    wandb.init()
    wandb.config.update(args)

    seed = 42
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.deterministic = True
    torch.backends.cudnn.benchmark = False

    loaded_model = False

    [train_loader, valid_loader, model,
     optimizer] = initialize(args, loaded_model)
    scaler = torch.cuda.amp.GradScaler()

    wandb.watch(model)
    best_acc = 0
    run_avg = RunningAverage()

    # scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min')
    # scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=0.001, max_lr=0.1, cycle_momentum=False)

    for epoch in range(1, args.epochs_number + 1):
        run_avg.reset_train()
        run_avg.reset_val()

        train(args, model, train_loader, epoch, optimizer, scaler, run_avg)
        val_acc = evaluation(args, model, valid_loader, epoch, run_avg)

        # scheduler.step()
        if best_acc < val_acc:
            best_acc = val_acc
            save_checkpoint(model, optimizer, args, epoch)
Example #11
0
    def evaluate_model(self, test_file, output_dict, output_file):
        """
        This method generate an output file by predicting a data in a given file with the trained model.
        """
        test_file = open(test_file, "r", encoding="utf8")
        index_dict = output_dict["index_dict"]
        token_dict = output_dict["token_dict"]
        model = output_dict["model"]
        te_refined_data, test_data_objs = self.cue_tr_obj.get_data_for_validation(
            test_file, self.max_len, index_dict, token_dict)

        # Prediction
        index2tag = index_dict["index2tag"]
        test_x, _, _ = self.cue_tr_obj.prepare_training_data(
            te_refined_data, self.features_dict, index_dict, self.embed_size)

        eval_obj = evaluation.evaluation()
        test_pred = eval_obj.predict_test(model, test_x, index2tag)
        #print("Unique labels: {}".format(np.unique(test_pred) ))

        # Create new file
        negation_dict = eval_obj.tag_negation_cues(test_data_objs, test_pred)
        dp_obj = data_prep.data_preparation()
        new_obj_list = dp_obj.create_new_obj_list(test_data_objs,
                                                  negation_dict)
        dp_obj.print_to_file(new_obj_list,
                             output_file)  # test_gold_pred_cue3 is the best
Example #12
0
 def __init__(self, SIZE):
     self.evaluator = evaluation(SIZE)
     self.board = [[0 for n in range(SIZE)] for i in range(SIZE)]
     self.gameover = 0
     self.overvalue = 0
     self.maxdepth = 3
     self.SIZE = SIZE
Example #13
0
    def __init__(self, client_name):
        # Nombre del cliente para identificar sobre cuales archivos se ejecutaran los procesos
        self.client_name = client_name
        self.config = configparser.ConfigParser()
        self.config.sections()
        if os.path.isfile(
                str('../Datasets/' + self.client_name) + 'config.ini'):
            with open(str('../Datasets/' + self.client_name) +
                      'config.ini') as config_parser_fp:
                self.config.read_file(config_parser_fp)

        self.database_path = "../Datasets/" + str(
            self.client_name) + "database/input_data/"
        self.sql_db = sql.create_engine('sqlite:///' + self.database_path +
                                        "db.sql")
        self.models_path = "../Datasets/" + str(
            self.client_name) + "database/models/"
        self.precision_weight = float(
            self.config['EVALUATION']['precision_weight'])
        self.recall_weight = float(self.config['EVALUATION']['recall_weight'])
        self.time_weight = float(self.config['EVALUATION']['time_weight'])
        self.rmse_weight = float(self.config['EVALUATION']['rmse_weight'])
        self.mae_weight = float(self.config['EVALUATION']['mae_weight'])
        self.common_functions = common(self.client_name)
        self.evaluation = evaluation(self.client_name)
Example #14
0
def entity_predict(dataset_iter):
    model.eval()
    dataset_iter.init_epoch()
    gold_list = []
    pred_list = []
    dete_result = []
    question_list = []
    for data_batch_idx, data_batch in enumerate(dataset_iter):
        #batch_size = data_batch.text.size()[1]
        answer = torch.max(model(data_batch), 1)[1].view(data_batch.ed.size())
        answer[(data_batch.text.data == 1)] = 1
        answer = np.transpose(answer.cpu().data.numpy())
        gold_list.append(np.transpose(data_batch.ed.cpu().data.numpy()))
        index_question = np.transpose(data_batch.text.cpu().data.numpy())
        question_array = index2word[index_question]
        dete_result.extend(answer)
        question_list.extend(question_array)
        #for i in range(batch_size):  # If no word is detected as entity, select top 3 possible words
        #    if all([j == 1 or j == idxO for j in answer[i]]):
        #        index = list(range(i, scores.shape[0], batch_size))
        #        FindOidx = [j for j, x in enumerate(answer[i]) if x == idxO]
        #        idx_in_socres = [index[j] for j in FindOidx]
        #        subscores = scores[idx_in_socres]
        #        answer[i][torch.sort(torch.max(subscores, 1)[0], descending=True)[1][0:min(2, len(FindOidx))]] = idxI
        pred_list.append(answer)
    P, R, F = evaluation(gold_list, pred_list, index2tag, type=False)
    print("{} Precision: {:10.6f}% Recall: {:10.6f}% F1 Score: {:10.6f}%".format("Dev", 100. * P, 100. * R, 100. * F))
    return dete_result, question_list
def test_cross_dataset(config_file,test_dataset, **kwargs):
    cfg.merge_from_file(config_file)
    if kwargs:
        opts = []
        for k,v in kwargs.items():
            opts.append(k)
            opts.append(v)
        cfg.merge_from_list(opts)
    cfg.freeze()
    
    PersonReID_Dataset_Downloader('./datasets',cfg.DATASETS.NAMES)
    _, _, _, num_classes = data_loader(cfg,cfg.DATASETS.NAMES)
    
    PersonReID_Dataset_Downloader('./datasets',test_dataset)
    _, val_loader, num_query, _ = data_loader(cfg,test_dataset)
    
    re_ranking=cfg.RE_RANKING
    
    if not re_ranking:
        logger = make_logger("Reid_Baseline", cfg.OUTPUT_DIR,
                             cfg.DATASETS.NAMES+'->'+test_dataset)
        logger.info("Test Results:")
    else:
        logger = make_logger("Reid_Baseline", cfg.OUTPUT_DIR,
                             cfg.DATASETS.NAMES+'->'+test_dataset+'_re-ranking')
        logger.info("Re-Ranking Test Results:") 
        
    device = torch.device(cfg.DEVICE)
    
    model = getattr(models, cfg.MODEL.NAME)(num_classes)
    model.load(cfg.OUTPUT_DIR,cfg.TEST.LOAD_EPOCH)
    model = model.eval()
    
    all_feats = []
    all_pids = []
    all_camids = []
    
    since = time.time()
    for data in tqdm(val_loader, desc='Feature Extraction', leave=False):
        model.eval()
        with torch.no_grad():
            images, pids, camids = data
            if device:
                model.to(device)
                images = images.to(device)
            
            feats = model(images)

        all_feats.append(feats)
        all_pids.extend(np.asarray(pids))
        all_camids.extend(np.asarray(camids))

    cmc, mAP = evaluation(all_feats,all_pids,all_camids,num_query,re_ranking)

    logger.info("mAP: {:.1%}".format(mAP))
    for r in [1, 5, 10]:
        logger.info("CMC curve, Rank-{:<3}:{:.1%}".format(r, cmc[r - 1]))
       
    test_time = time.time() - since
    logger.info('Testing complete in {:.0f}m {:.0f}s'.format(test_time // 60, test_time % 60))
Example #16
0
def main():

    X, Y, ids, Amounts = preprocess()
    ratio = int(math.ceil(.8 * len(X)))
    x_train, y_train, x_test, y_test = X[:ratio], Y[:ratio], X[ratio:], Y[
        ratio:]
    ids_train, ids_test = ids[:ratio], ids[ratio:]

    model = v3.bayesnet()
    eval = evaluation(Amounts[ratio:])

    #print np.unique(ids_train).shape
    #print np.unique(ids_test).shape

    for rounds in range(1):

        print 'round: ' + str(rounds)

        #print "n_samples: ", n_samples
        model.fit(x_train,
                  y_train,
                  ids_train,
                  k_clusters=FLAGS.k_clusters,
                  epochs=40,
                  verbose=1)

        #print "MLE + EM: "
        predictions = model.predict(x_test, ids_test)
        eval.evaluate(predictions, y_test)

    eval.get_results().to_csv("../results/" + "v3_" + str(FLAGS.k_clusters),
                              index=False)
Example #17
0
def trainStep(model,
              input_tensor,
              target_tensor,
              testLoader,
              optimizer,
              device,
              criterion=nn.CrossEntropyLoss()):
    '''
    :param model: no need to use device
    :param input_tensor:
    :param target_tensor:
    :param optimizer:
    :param criterion:
    :return:
    '''
    input_tensors, mask, segment_id = input_tensor
    output_tensor = model(input_tensors, segment_id, mask)

    # p,r,f1=eval(output_tensor,target_tensor)
    p, r, f1 = evaluation(model, testLoader, device)

    loss = criterion(output_tensor, target_tensor.squeeze())

    loss.backward()
    optimizer.step()
    optimizer.zero_grad()

    return model, loss, p, r, f1
Example #18
0
def test(in_dir, class_dir, feat_dir):

    if not os.path.exists(feat_dir):
        os.mkdir(feat_dir)

    cuda = torch.cuda.is_available()
    if cuda:
        print('cuda is available!')

    img_transform = transforms.Compose([
        transforms.ToTensor(),
    ])
    test_dataset = LD(in_dir, lmark_num=LMARK_NUM, color=1)
    test_loader = DataLoader(test_dataset, batch_size=3000)

    model = Encoder()
    if cuda:
        model.cuda()

    model.load_state_dict(torch.load(class_dir + 'classifier.pth'))

    lmark, reye, leye, mouth, label = iter(test_loader).next()
    lmark = lmark.view(lmark.size(0), -1)
    label = label.numpy()

    with torch.no_grad():
        if cuda:
            lmark = Variable(lmark).cuda().float()
            reye = Variable(reye).cuda().float()
            leye = Variable(leye).cuda().float()
            mouth = Variable(mouth).cuda().float()
        else:
            lmark = Variable(lmark).float()
            reye = Variable(reye).float()
            leye = Variable(leye).float()
            mouth = Variable(mouth).float()

        out, feat = model(lmark, reye, leye, mouth)
        feat = feat.cpu().data.numpy()
        out = out.view(out.size(0)).cpu().data.numpy()

    f = open(class_dir + 'result.txt', 'w')
    for i in range(len(label)):
        f.write(str(label[i]) + ' ' + str(out[i]) + '\n')
        np.save(feat_dir + str(i).zfill(4) + '_' + str(int(label[i])), feat[i])
    f.close()
    evaluation(class_dir)
Example #19
0
    def notificationTourJoueur(self):
        #Un joueur choisi une action (on annonce le tour du joueur, si id%2 == 0 alors blanc sinon noir)
        if (self.turnId % 2 == 0):
            print("C'est au tour de", self.player1)
        else:
            print("C'est au tour de", self.player2)

        print(evaluation(self.board.fen()))
def emsemble(cross_epoch = 0,data_index=None,cut_shape=None,data_type=['MCIc','MCInc'],pre_dir='/home/anzeng/rhb/fmri_data',
         num_batches = 256*5,voxnet_point=None,test_size = 6,brain_map=[217],f_handle = None):
    # tf.reset_default_graph()
    keras.backend.clear_session()
    dataset = fMRI_data(data_type, data_index=data_index, varbass=False, dir=pre_dir)
    # xyz = 32
    # input_shape = [None, xyz, xyz, xyz, 1]
    # voxnet = VoxNet(input_shape=input_shape, voxnet_type='cut')

    true_shape = []
    for x in range(0, len(cut_shape), 2):
        true_shape.append(cut_shape[x + 1] - cut_shape[x] + 1)
    # with tf.Session() as sess:
    #     sess.run(tf.global_variables_initializer())
    #     voxnet.npz_saver.restore(sess,voxnet_point)
    #加载模型
    model = keras.models.load_model(voxnet_point)
    print('train_acc')
    train_fmri_evaluation = evaluation()
    train_smri_evaluation = evaluation()
    train_iter = iter(dataset.get_fmri('train')).__next__
    for i in range(100):
        img,label,_ = train_iter()
        predict,y_true = get_label(model,img,label,cut_shape,true_shape)
        predict = np.argmax(predict,axis=1)
        train_smri_evaluation += evaluation(y_predict=predict,y_true=y_true)
        if i %10 == 0 and i > 0:
            print(train_smri_evaluation)
        y_predict = ensemble_label(predict,2)
        train_fmri_evaluation += evaluation(y_predict = [y_predict],y_true=[label])
    print(train_fmri_evaluation)
    print('test_acc')
    test_fmri_evaluation = evaluation()
    test_smri_evaluation = evaluation()
    test_iter = iter(dataset.get_fmri('test')).__next__
    for i in range(test_size):
        img, label,filename = test_iter()
        predict, y_true = get_label(model, img, label, cut_shape, true_shape)
        predict = np.argmax(predict,axis=1)
        test_smri_evaluation_one = evaluation(y_predict=predict, y_true=y_true)
        test_smri_evaluation += test_smri_evaluation_one
        print(test_smri_evaluation_one)
        print(test_smri_evaluation)
        y_predict = ensemble_label(predict,2)
        test_fmri_evaluation += evaluation(y_predict=[y_predict], y_true=[label])
        print(y_predict,label,test_fmri_evaluation)
        # if y_predict != label:
        #     print(filename)
        #     f_handle.write(filename+'\n')
    if f_handle:
        f_handle.write('ensemble train:\n')
        f_handle.write(str(train_fmri_evaluation) + '\n')
        f_handle.write('ensemble test:\n')
        f_handle.write(str(test_fmri_evaluation) + '\n')
    return test_fmri_evaluation
Example #21
0
def ajax_bgm_recommendation():
    #exec(compile(open('./evaluation.py', "rb").read(), './evaluation.py', 'exec'))
    #subprocess.call("evaluation.py", shell=True)
    EVA = evaluation.evaluation()
    wgan_result = EVA.gan_evaluation()

    with open('./recommend_result.txt', 'w') as f:
        f.write(wgan_result)
    return redirect(url_for('mix_video_audio'))
 def final_evaluation(self):
     combine = eval.combine_pred_real_labels(self.all_final_pred_label,
                                             self.all_real_label)
     eval_measures, overall_accuracy = eval.evaluation(combine)
     print('evaluation for all model')
     print(eval_measures)
     print('accuracy')
     print(overall_accuracy)
     return eval_measures, overall_accuracy
 def __init__(self, path, test_size, random_state):
     self.process = Process(path)
     self.evaluation = evaluation()
     self.result = self.process.load_image_files_modified()
     self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
         self.result.data,
         self.result.target,
         test_size=test_size,
         random_state=random_state)
def lgb_cv(cv_train, cv_test, params, low_bound, topk, idx):
    print('CV Fold {}/5'.format(idx))
    params['gpu_device_id'] = idx - 1
    train_gid, train_feat, train_label = utils.preprocess_xgb(cv_train)
    del cv_train
    # print('| Training Data: Adding product id & aisle id & department id ...')
    # train_feat['product_id'], train_feat['aisle_id'], train_feat['department_id'] = train_gid['product_id'], train_gid['aisle_id'], train_gid['department_id']
    test_gid, test_feat, test_label = utils.preprocess_xgb(cv_test)
    del cv_test
    # print('| Test Data: Adding product id & aisle id & department id ...')
    # test_feat['product_id'], test_feat['aisle_id'], test_feat['department_id'] = test_gid['product_id'], test_gid['aisle_id'], test_gid['department_id']
    print('| Construct lgb Dataset ...')
    lgb_train = lgb.Dataset(
        train_feat, train_label, free_raw_data=True
    )  #, categorical_feature=['product_id', 'aisle_id', 'department_id'])
    del train_feat, train_label
    # lgb_test = lgb.Dataset(test_feat, test_label, free_raw_data=True)
    print('| Training ...')
    gbm = lgb.train(params,
                    lgb_train,
                    num_boost_round=num_rounds,
                    valid_sets=lgb_train)
    del lgb_train
    y_scores = gbm.predict(test_feat, num_iteration=gbm.best_iteration)
    del test_feat
    test_auc_score = roc_auc_score(test_label, y_scores)
    print('| test auc: %s' % test_auc_score)
    gc.collect()

    user_product = test_gid[['user_id', 'product_id', 'order_id']]
    user_product['label'] = test_label
    user_product['score'] = y_scores
    user_product = user_product.sort_values(['user_id', 'order_id', 'score'],
                                            ascending=False)
    gold = evaluation.get_gold(user_product)
    op = user_product.copy()
    # op = utils.shing_f1_optim(op, low_bound, int(topk))
    op = utils.faron_f1_optim(op, low_bound, int(topk))
    op['products'] = op['products'].apply(
        lambda x: [int(i) if i != 'None' else i for i in x.split()])
    op = pd.merge(pd.DataFrame({'order_id': user_product.order_id.unique()}),
                  op,
                  on=['order_id'],
                  how='left')

    res = evaluation.evaluation(gold, op[['order_id', 'products']])
    mf1 = res.f1score.mean()
    with open(
            constants.LGB_DIR + 'lgb_{}_{:.6f}_{:.6f}'.format(
                params['boosting_type'], test_auc_score, mf1), 'wb') as f:
        pickle.dump(gbm, f, pickle.HIGHEST_PROTOCOL)
        del gbm
    print('F1 Optimization Result: mean-f1-score {}'.format(mf1))
    del user_product, op, gold, res
    gc.collect()
    return mf1
Example #25
0
def minimax(position, stack, depth, alpha, beta, maximizingPlayer,
            calculations):

    calculations[0] += 1

    gameOver = isGameOver(position)
    if gameOver != 0:
        return gameOver * 10000

    elif depth == 0:
        return evaluation(position)

    if maximizingPlayer:
        maxEval = -99999

        # for each child of position
        for x in [3, 2, 4, 1, 5, 0, 6]:
            y = stack[x]
            if y < BOARD_HEIGHT:

                position[x][y] = 1
                stack[x] += 1
                _eval = minimax(position, stack, depth - 1, alpha, beta, False,
                                calculations)
                position[x][y] = 0
                stack[x] -= 1

                maxEval = max(maxEval, _eval)
                alpha = max(alpha, _eval)
                if beta <= alpha:
                    return maxEval

        return maxEval

    else:
        minEval = 99999

        # for each child of position
        for x in [3, 2, 4, 1, 5, 0, 6]:
            y = stack[x]
            if y < BOARD_HEIGHT:

                position[x][y] = -1
                stack[x] += 1
                _eval = minimax(position, stack, depth - 1, alpha, beta, True,
                                calculations)
                position[x][y] = 0
                stack[x] -= 1

                minEval = min(minEval, _eval)
                beta = min(beta, _eval)
                if beta <= alpha:
                    return minEval

        return minEval
 def final_evaluation(self):
     """
     This function gives overall evaluation of the 12 models.
     @return: first: measures for each class, second: average accuracy
     """
     combine = eval.combine_pred_real_labels(self.all_models_pred_labels,
                                             self.all_models_real_labels)
     eval_measures, overall_accuracy = eval.evaluation(combine)
     print(eval_measures)
     print(overall_accuracy)
     return eval_measures, overall_accuracy
 def model_eval(self):
     """
     After fully trained ,can be used to evaluate this model
     @return: precision, recall, F1, accuracy
     """
     test_output, _ = self.get_test_output_loss()
     pred_y = eval.predict_labels(test_output)
     combine = eval.combine_pred_real_labels(pred_y, self.test_y)
     eval_measures, accuracy = eval.evaluation(combine)
     print(eval_measures)
     print(accuracy)
     return eval_measures, accuracy
Example #28
0
def random_walk(n, cnt, p):
    maze = generateMatrix(n)
    first = evaluation(maze, n)
    minima = first
    print("Init energy: %d" % (first))
    #print(maze)
    backup = maze

    for i in range(cnt):
        #randomly choose a cell to change, make sure it's not goal state
        if i % 100 == 0:
            print("Current energy: %d" % (minima))
        goal = [n - 1, n - 1]
        new_cell = goal
        while new_cell == goal:
            row = rd.randint(0, n - 1)
            col = rd.randint(0, n - 1)
            new_cell = [row, col]

        #change the jump number, make sure it's not same as before
        old_step = maze[row, col]
        new_step = old_step
        maxrnd = max(n - row - 1, n - col - 1, row, col)
        while new_step == old_step:
            new_step = rd.randint(1, maxrnd)
        maze[row, col] = new_step

        #get the new energy
        nexte = evaluation(maze, n)
        if nexte < first or rd.random() < p:
            first = nexte  # minima < nexte < first, not minima keep going
            if nexte < minima:  #new minima, store the matrix
                minima = nexte
                backup = maze
        else:
            maze = backup

    return (minima, maze)
def predict(dataset_iter=test_iter, dataset=test, data_name="test"):
    print("Dataset: {}".format(data_name))
    model.eval()
    dataset_iter.init_epoch()

    n_correct = 0
    fname = "{}.txt".format(data_name)
    temp_file = 'tmp' + fname
    results_file = open(temp_file, 'w')

    gold_list = []
    pred_list = []

    for data_batch_idx, data_batch in enumerate(dataset_iter):
        scores = model(data_batch)
        if args.dataset == 'EntityDetection':
            n_correct += torch.sum(torch.sum(torch.max(scores, 1)[1].view(data_batch.ed.size()).data == data_batch.ed.data, dim=1) \
                              == data_batch.ed.size()[0]).item()
            index_tag = np.transpose(
                torch.max(scores,
                          1)[1].view(data_batch.ed.size()).cpu().data.numpy())
            tag_array = index2tag[index_tag]
            index_question = np.transpose(data_batch.text.cpu().data.numpy())
            question_array = index2word[index_question]
            gold_list.append(np.transpose(data_batch.ed.cpu().data.numpy()))
            gold_array = index2tag[np.transpose(
                data_batch.ed.cpu().data.numpy())]
            pred_list.append(index_tag)
            for question, label, gold in zip(question_array, tag_array,
                                             gold_array):
                results_file.write("{}\t{}\t{}\n".format(
                    " ".join(question), " ".join(label), " ".join(gold)))
        else:
            print("Wrong Dataset")
            exit()

    if args.dataset == 'EntityDetection':
        P, R, F = evaluation(gold_list, pred_list, index2tag, type=False)
        print("{} Precision: {:10.6f}% Recall: {:10.6f}% F1 Score: {:10.6f}%".
              format("Dev", 100. * P, 100. * R, 100. * F))
    else:
        print("Wrong dataset")
        exit()
    results_file.flush()
    results_file.close()
    convert(temp_file,
            os.path.join(args.data_dir, "lineids_{}.txt".format(data_name)),
            os.path.join(results_path, "query.{}".format(data_name)))
    os.remove(temp_file)
def train_neural_network(x_train, train_labels, x_test, orig_test):
    """
  Trains neural network ready-to-use dataframes
  Args:
  X_train: train dataset
  train_labels: train labels
  X_test: test dataset
  """
    train_features = np.array(x_train)
    test_features = np.array(x_test)
    train_labels = np.array(train_labels['Col2'])

    model = models.make_model(params=train_features,
                              model_name='neural_network_1')

    checkpoint_cb, tensorboard_cb = models.callbacks(
        model_name='nn_submission03_s_1_m1_f_2165.ckpt')
    epochs = 6
    batch_size = 32

    history = model.fit(train_features,
                        train_labels,
                        batch_size=batch_size,
                        epochs=epochs,
                        callbacks=[checkpoint_cb, tensorboard_cb]
                        #     validation_data=(val_features, val_labels)
                        )

    evaluation.evaluation(model, train_features, train_labels)
    evaluation.plot_metrices(epochs, history, if_val=False)
    evaluation.plot_confusion_matrix(model, train_features, train_labels)
    evaluation.submission_nn(
        model=model,
        test_features=test_features,
        orig_test_df=orig_test,
        submission_name='nn_submission03_s_1_m1_f_2165.csv')
Example #31
0
def validate(test_loader, model, args):
    # switch to evaluation mode
    model.eval()
    testdata = torch.Tensor()
    testlabel = torch.LongTensor()
    with torch.no_grad():
        for i, (input, target) in enumerate(test_loader):
            if args.gpu is not None:
                input = input.cuda(args.gpu, non_blocking=True)

            # compute output
            output = model(input)
            testdata = torch.cat((testdata, output.cpu()), 0)
            testlabel = torch.cat((testlabel, target))
    nmi, recall = eva.evaluation(testdata.numpy(), testlabel.numpy(), [1, 2, 4, 8])
    return nmi, recall
Example #32
0
def minimax(position, depth, alpha, beta, maximizingPlayer):
    gameOver = isGameOver(position)
    if gameOver != 0:
        return gameOver * 10000

    elif depth == 0:
        return evaluation(position)

    if maximizingPlayer:
        maxEval = -999999

        # for each child of position
        for child in child_order:
            x = child[0]
            y = child[1]
            if position[x][y] == 0:

                position[x][y] = 1
                _eval = minimax(position, depth - 1, alpha, beta, False)
                position[x][y] = 0

                maxEval = max(maxEval, _eval)
                alpha = max(alpha, _eval)
                if beta <= alpha:
                    return maxEval

        return maxEval

    else:
        minEval = 999999

        # for each child of position
        for child in child_order:
            x = child[0]
            y = child[1]
            if position[x][y] == 0:

                position[x][y] = -1
                _eval = minimax(position, depth - 1, alpha, beta, True)
                position[x][y] = 0

                minEval = min(minEval, _eval)
                beta = min(beta, _eval)
                if beta <= alpha:
                    return minEval

        return minEval
Example #33
0
 def svr_timeseries(self, x_train, y_train, x_test, y_real, kernel):
     '''
     c_set = [0.0001,0.001,0.01,0.1,1,10,100,1000]
     gamma_set = [0.0001,0.001,0.01,0.1,10,100,1000]
     epsilon_set = [0.0001,0.001,0.01,0.1,10,100,1000]
     '''
     parameter_start = -5
     parameter_stop = 5
     count = 10
     c_set = svm.svmCal.svr.numberGenerate(self, parameter_start, parameter_stop, count)
     gamma_set = svm.svmCal.svr.numberGenerate(self, parameter_start, parameter_stop, count)
     epsilon_set = svm.svmCal.svr.numberGenerate(self, parameter_start, parameter_stop, count)
     c_min = 0
     gamma_min = 0
     epsilon_min = 0
     nmse_min = 100
     nmse_result = []
     ds_max = 0
     ds_result = []
     profit_result = []
     profit_max = -100
     doc_max = -1000 # R square
     doc_result = []
     loop_number = (1+count)**3
     loop_count = 0
     percent_count = 0.05
     t0 = time.time()
     for C in c_set:
         for gamma in gamma_set:
             for epsilon in epsilon_set:
                 loop_count += 1
                 if (C * gamma * epsilon != 0):
                     svr_rbf = SVR(kernel=kernel, C=C, gamma=gamma, epsilon = epsilon)
                     #svr_rbf.fit(x_train, y_train)
                     y_pred = svr_rbf.fit(x_train, y_train).predict(x_test)
                     nmse = evaluation.evaluation(y_real,y_pred).NMSE()
                     ds = evaluation.evaluation(y_real,y_pred).DS()
                     profit = ljCao.profit.profitLjCao(y_real,y_pred).Profit()
                     doc = r2_score(y_real,y_pred)
                     #corr = np.corrcoef(y_real, y_pred, bias = 0, ddof = None)[0,1]
                     #print("C = %f, gamma = %f, epsilon = %f, NMSE = %f, DS = %f, Profit = %f, DOC = %f" %(C,gamma,epsilon,nmse,ds,profit,doc))
                     nmse_result.append(nmse)
                     ds_result.append(ds)
                     doc_result.append(doc)
                     if (doc > doc_max):
                         c_min = C
                         gamma_min = gamma
                         epsilon_min = epsilon
                         doc_max = doc
                 finished_percent = float(loop_count) / float(loop_number)
                 t1 = time.time()
                 if finished_percent > percent_count :
                     minutes_lfet = ((t1-t0) * (1.0 - finished_percent) / finished_percent) /60
                     print("%d%% %f minutes left" %(percent_count * 100, minutes_lfet ))
                     percent_count += 0.05
     svr_rbf = SVR(kernel=kernel, C=c_min, gamma=gamma_min, epsilon = epsilon_min)
     y_pred = svr_rbf.fit(x_train, y_train).predict(x_test)
     nmse = evaluation.evaluation(y_real,y_pred).NMSE()
     ds = evaluation.evaluation(y_real,y_pred).DS()
     profit = ljCao.profit.profitLjCao(y_real,y_pred).Profit()
     profit_time = ljCao.profit.profitLjCao(y_real,y_pred).ProfitTimeSeries()
     doc = r2_score(y_real,y_pred)
     
     print('MAX DS = %f' %ds)
     print('Hit rate = %f' %(float(ds) / float(len(y_pred))))
     print('NMSE = %f' %nmse)
     print('Profit = %f' %profit)
     print('DOC = %f' %doc)
     print("C = %f, gamma = %f, epsilon = %f" %(c_min,gamma_min,epsilon_min))
     x = range(len(y_real))
     plt.figure()
     plt.subplot2grid((2,2),(0, 0))
     plt.scatter(x, y_real, c='k', label='data')
     plt.scatter(x, y_pred, c='r', label='RBF model')
     plt.xlabel('data')
     plt.ylabel('target')
     plt.title('Support Vector Regression')
     #plt.legend()
     #plt.figure(2)
     plt.subplot2grid((2,2),(0, 1))
     x = range(len(profit_time))
     plt.plot(x, profit_time, c='g', label='profit')
     plt.xlabel('day')
     plt.ylabel('profit (times)')
     #plt.figure(3)
     plt.subplot2grid((2,2),(1, 0))
     x = range(len(nmse_result))
     plt.plot(x, nmse_result, c='g', label='NMSE')
     plt.xlabel('Times')
     plt.ylabel('NMSE')
     plt.subplot2grid((2,2),(1, 1))
     plt.show()
     pass
Example #34
0
user_item.generateCandidatesWithWeights(sc)
gc.collect()
os.rename("subalg/user_item/output/part-00000","subalg/user_item/output/user_item_results.txt")

print "\nStarting item-item logic"
item_item.generateCandidatesWithWeights(sc) #pass in sc, expects a file to have been writter
gc.collect()

print "\n\nDone processing data, begin Logistic Regression..."
lr.runLogisticRegression(sc)

print "Starting evaluation..."
resultLoc = 'logistic_regression/output/final_output.txt'
solLoc = 'data/solution.csv'

print evaluation.evaluation(resultLoc, solLoc)

### Postprocessing - Mainly should be to delete files created on disk ###
print "Postprocessing - Cleaning up"
try:
	os.remove('subalg/item_item/output/item_item_results.txt')
	os.remove('subalg/user_user/output/user_user_results.txt')
	os.remove('subalg/item_item/output/user_item_results.txt')
	os.remove('logistic_regression/output/input_for_lr.txt')

except:
	print "Something went wrong with removing temporary files, you may need to manually delete them."

print "Exiting spark..."

Example #35
0
            if len(resultmerchant) != 0:
                str = ''
                for mer in resultmerchant:
                    str = str + mer + ':'
                str = str[0:len(str)-1]
                result.append(str)
                allresult.append(result)
outfile = open('/home/wanghao/Document/tianchi/trainset/trainresult.csv','wb')
import csv
writer = csv.writer(outfile)
writer.writerows(outfile)
outfile.close()

# evaluate the result
eval = evaluation()
truefile = '/home/wanghao/Document/tianchi/dataset/train11'
predictfile = '/home/wanghao/Document/tianchi/trainset/trainresult.csv'
merchantfile = '/home/wanghao/Document/tianchi/tianchi_dataset/ijcai2016_merchant_info'
eval.getS_true(truefile)
eval.getS_predict(predictfile)
eval.get_MerchantBudget(merchantfile)
f1 = eval.comp_f1_score()

print "This train F1 score is ", f1





Example #36
0
def logistic_reg(train_dataSet, train_labels, test_dataSet, test_labels,lamda):
	
	#print '--------------------- Logistic Regression ----------------'
	#print 'Loading data...'
	#load data
	#[x, y, train_size,x_test,y_test,test_size] = document_vectorize.createDataSet(train_path, test_path, category, k)

	MaxIteration = 100
	
	train_sample_num = len(train_labels)
	test_sample_num = len(test_labels)
	feature_num =  2
	


	#lamdas = [0.0001,0.001, 0.01, 0.1, 1, 2, 5, 6,10,100,1000]
   	#lamdas = [100]

	#for lamda in lamdas:

	update_loss = 0
	min_loss = 'Inf'
	train_predict_cat = numpy.zeros(train_sample_num)
	test_predict_cat = numpy.zeros(test_sample_num)

	w = numpy.zeros(feature_num)
	b = 0
	min_w = numpy.zeros(feature_num)  
	min_b = 0
	#print '------------------------------------------------------------'
	#print 'lamda = ', lamda

	shuffle_order = range(train_sample_num)
	#start traing
	start_time = time.time()
	for iteration in range(MaxIteration):

		learn_rate = 1/(iteration+1)
		shuffle(shuffle_order)

		#stochastic gradient descent
		for t in shuffle_order:
			
			temp1 = numpy.add(numpy.inner(w, train_dataSet[t]), b)
			temp2 = numpy.exp(numpy.multiply(temp1, train_labels[t]))
			temp3 = learn_rate / (1 + temp2) 
			w = numpy.add( (1 - lamda * learn_rate) * w, numpy.multiply(train_labels[t] * temp3, train_dataSet[t])) 
			b += learn_rate * train_labels[t] * temp3


		#print "iteration = "+str(iteration)
		#print "update_loss ="+str(update_loss)
		#print "min_loss =" +str(min_loss)

		#calculate loss
		temp_loss = 0
		for i in range (train_sample_num):
			temp1 = numpy.add(numpy.inner(w, train_dataSet[t]), b)
			temp2 = numpy.exp(numpy.multiply(-temp1, train_labels[t]))
			temp3 = 1 / (1 + temp2)
			temp_loss += numpy.log(1 + temp3)
		square = lamda * numpy.sum(numpy.square(w)) / 2.0
		update_loss = temp_loss/train_sample_num + square

		#if min_loss == 0:
		#   break
		#record minimum loss
		if min_loss > update_loss:
			min_loss = update_loss
			min_w = w
			min_b = b

		#print "min_loss = "+str(min_loss)
		#print "iteration = "+str(iteration)
		#if abs(min_loss - update_loss) < 0.000001:
		#   break
	print 'min loss', min_loss	
	#print 'Training time: ', time.time()-start_time
	print "RESULT: w: " + str(min_w) + " b: " + str(min_b)

	#predict training set
	for i in range(train_sample_num):
		if ((numpy.inner(train_dataSet[i],min_w)+min_b)) < 0:
			train_predict_cat[i] = -1
		else:
			train_predict_cat[i] = 1
	#print "# train_size = " + str(train_size)
	print 'Accuracy for training dataset: ',evaluation.evaluation(train_predict_cat, train_labels)

	#predict test set
	for i in range(test_sample_num):
		if ((numpy.inner(test_dataSet[i],min_w)+min_b)) < 0:
			test_predict_cat[i] = -1
		else:
			test_predict_cat[i] = 1
	#print "# test_size = " + str(test_size)
	print 'Accuracy for test dataset: ',evaluation.evaluation(test_predict_cat, test_labels)
Example #37
0
 def svr_timeseries(self, x_train, y_train, x_test, y_real, kernel):
     '''
     c_set = [0.0001,0.001,0.01,0.1,1,10,100,1000]
     gamma_set = [0.0001,0.001,0.01,0.1,10,100,1000]
     epsilon_set = [0.0001,0.001,0.01,0.1,10,100,1000]
     '''
     parameter_start = -4
     parameter_stop = 4
     count = 10.0
     c_set = self.numberGenerate(parameter_start, parameter_stop, count)
     gamma_set = self.numberGenerate(parameter_start, parameter_stop, count)
     epsilon_set = self.numberGenerate(parameter_start, parameter_stop, count)
     #c_set = self.numberGenerate(-2, 0, count)
     #gamma_set = self.numberGenerate(1, 2, count)
     #epsilon_set = self.numberGenerate(-2, 0, count)
     print c_set
     c_min = 0
     gamma_min = 0
     epsilon_min = 0
     nmse_min = 100
     nmse_result = []
     ds_max = 0
     ds_result = []
     mae_result = []
     profit_result = []
     profit_max = -100
     doc_max = -1000 # R square
     doc_result = []
     loop_number = (1+count)**3
     loop_count = 0
     percent_count = 0.05
     t0 = time.time()
     #=======================================================================
     # c_set = [0.088914]
     # gamma_set = [7.924466]
     # epsilon_set = [0.019905]
     #=======================================================================
     for C in c_set:
         for gamma in gamma_set:
             for epsilon in epsilon_set:
                 loop_count += 1
                 if (C * gamma * epsilon != 0):
                     svr_rbf = SVR(kernel=kernel, C=C, gamma=gamma, epsilon = epsilon)
                     #svr_rbf.fit(x_train, y_train)
                     y_pred = svr_rbf.fit(x_train, y_train).predict(x_test)
                     result = pd.DataFrame()
                     result["Y_real"] = y_real
                     result["y_pred"] = y_pred
                     #=======================================================
                     # plt.scatter(y_real, y_real - y_pred)
                     # plt.show()
                     #=======================================================
                     #x_axis = range(len(y_real))
                     #plt.plot(x_axis, y_real[:100], color = "r", )
                     #plt.plot(x_axis, y_pred[:100])
                     #=======================================================
                     # plt.scatter(x_axis, y_real - y_pred)
                     # plt.xlabel("Trade Count")
                     # plt.ylabel("Real High - Pred High")
                     # plt.legend()
                     # plt.show()
                     # plt.scatter(x_axis, y_real, color = "r", )
                     # plt.plot(x_axis, y_pred)
                     # plt.show()
                     #=======================================================
                     
                     nmse = evaluation.evaluation(y_real,y_pred).NMSE()
                     ds = evaluation.evaluation(y_real,y_pred).DS()
                     mae = evaluation.evaluation(y_real, y_pred).MAE()
                     #profit = ljCao.profit.profitLjCao(y_real,y_pred).Profit()
                     doc = r2_score(y_real,y_pred)
                     #corr = np.corrcoef(y_real, y_pred, bias = 0, ddof = None)[0,1]
                     #print("C = %f, gamma = %f, epsilon = %f, NMSE = %f, DS = %f, Profit = %f, DOC = %f" %(C,gamma,epsilon,nmse,ds,profit,doc))
                     nmse_result.append(nmse)
                     ds_result.append(ds)
                     doc_result.append(doc)
                     mae_result.append(mae)
                     if (doc > doc_max):
                         c_min = C
                         gamma_min = gamma
                         epsilon_min = epsilon
                         doc_max = doc
                 finished_percent = float(loop_count) / float(loop_number)
                 t1 = time.time()
                 if finished_percent > percent_count :
                     minutes_left = ((t1-t0) * (1.0 - finished_percent) / finished_percent) /60
                     print("%d%% %f minutes left" %(percent_count * 100, minutes_left ))
                     percent_count += 0.05
     svr_rbf = SVR(kernel=kernel, C=c_min, gamma=gamma_min, epsilon = epsilon_min)
     y_pred = svr_rbf.fit(x_train, y_train).predict(x_test)
     nmse = evaluation.evaluation(y_real,y_pred).NMSE()
     ds = evaluation.evaluation(y_real,y_pred).DS()
     mae = evaluation.evaluation(y_real,y_pred).MAE()
     #profit = ljCao.profit.profitLjCao(y_real,y_pred).Profit()
     #profit_time = ljCao.profit.profitLjCao(y_real,y_pred).ProfitTimeSeries()
     doc = r2_score(y_real,y_pred)
     
     print('MAX DS = %f' %ds)
     print('Hit rate = %f' %(float(ds) / float(len(y_pred))))
     print('NMSE = %f' %nmse)
     #print('Profit = %f' %profit)
     print('DOC = %f' %doc)
     print("MAE = %f" %mae)
     print("C = %f, gamma = %f, epsilon = %f" %(c_min,gamma_min,epsilon_min))
     x = range(len(y_real))
     plt.figure()
     plt.subplot2grid((2,2),(0, 0))
     plt.scatter(x, y_real, c='k', label='data')
     plt.scatter(x, y_pred, c='r', label='RBF model')
     plt.xlabel('data')
     plt.ylabel('target')
     plt.title('Support Vector Regression')
     #plt.legend()
     #plt.figure(2)
     #plt.subplot2grid((2,2),(0, 1))
     #x = range(len(profit_time))
     #plt.plot(x, profit_time, c='g', label='profit')
     #plt.xlabel('day')
     #plt.ylabel('profit (times)')
     #plt.figure(3)
     plt.subplot2grid((2,2),(1, 0))
     x = range(len(nmse_result))
     plt.plot(x, nmse_result, c='g', label='NMSE')
     plt.xlabel('Times')
     plt.ylabel('NMSE')
     plt.subplot2grid((2,2),(1, 1))
     plt.show()
     plt.scatter(y_pred, y_real - y_pred)
     plt.xlabel('Prediction lowest price in 2nd day')
     plt.ylabel("(Real - Prediction) lowest price in 2nd day")
     plt.show()
     pass
def trainClassifier(conn, cursor, tablename, test_tweet, enable_evaluation):
	"""Train the Naive Bayes"""
	
	stop_words = []
	
	# Fetch all the stop words
	# try:
		# query_sw = "SELECT word FROM stop_words limit 35"
		# cursor.execute(query_sw)
		# sw = cursor.fetchall()
		# stop_words = filter_tweets(sw)
		# print(stop_words)
	# except:
		# Get the most recent exception
		# exceptionType, exceptionValue, exceptionTraceback = sys.exc_info()
		# print "Select Error -> %s" % exceptionValue
		# lastid="0"
	
	# Fetch all the traffic tweets
	try:
		query_pt = "SELECT tweet FROM "+ tablename +" WHERE ptraffic='y' ORDER BY tid ASC LIMIT 681"
		cursor.execute(query_pt)
		ttweets = cursor.fetchall()
	except:
		# Get the most recent exception
		exceptionType, exceptionValue, exceptionTraceback = sys.exc_info()
		print "Select Error -> %s" % exceptionValue
		lastid="0"
	
	# Fetch all the non-traffic tweets	
	try:
		query_nt = "SELECT tweet FROM "+ tablename +" WHERE ntraffic='y' ORDER BY tid ASC LIMIT 681"
		cursor.execute(query_nt)
		nttweets = cursor.fetchall()
	except:
		# Get the most recent exception
		exceptionType, exceptionValue, exceptionTraceback = sys.exc_info()
		print "Select Error -> %s" % exceptionValue
		lastid="0"
	
	# If the user chose to evaluate the classifier fetach more labelled tweets for testing
	if enable_evaluation == 'test':
		# Fetch all the traffic tweets for the evaluation
		try:
			query_pt = "SELECT tweet FROM "+ tablename +" WHERE ptraffic='y' ORDER BY tid DESC LIMIT 375"
			cursor.execute(query_pt)
			ttweets_test = cursor.fetchall()
		except:
			# Get the most recent exception
			exceptionType, exceptionValue, exceptionTraceback = sys.exc_info()
			print "Select Error -> %s" % exceptionValue
			lastid="0"
		
		# Fetch all the non-traffic tweets for the evaluation
		try:
			query_nt = "SELECT tweet FROM "+ tablename +" WHERE ntraffic='y' ORDER BY tid DESC LIMIT 375"
			cursor.execute(query_nt)
			nttweets_test = cursor.fetchall()
		except:
			# Get the most recent exception
			exceptionType, exceptionValue, exceptionTraceback = sys.exc_info()
			print "Select Error -> %s" % exceptionValue
			lastid="0"
		
		
	try:
	
		# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
		# >>>>>>>>>>>>>>>>>>>>>>>>>> TRAIN SET <<<<<<<<<<<<<<<<<<<<<<<<<<
		# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
		# Apply preprocessing on the traffic tweets for the train set
		data=[]
		for text in ttweets:
			temp = preprocessor().preprocess(text[0],stop_words)
			data.append(temp)
		traffic_tweets=add_label(data, 'traffic')
		
		# Apply preprocessing on the non-traffic tweets for the train set
		data=[]
		for text in nttweets:
			temp = preprocessor().preprocess(text[0],stop_words)
			data.append(temp)
		nontraffic_tweets=add_label(data, 'nontraffic')
		
		# Merge the tweets for the train set
		combined_tweets = traffic_tweets + nontraffic_tweets

		# Extract the features for the train set
		temp = []
		for i in range(len(combined_tweets)):
			temp.append(((features_extractor(combined_tweets[i][0])),combined_tweets[i][1]))
		train_set=temp
		
		# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
		# >>>>>>>>>>>>>>>>>>>>>>>>>> TEST SET <<<<<<<<<<<<<<<<<<<<<<<<<<<
		# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
		
		# If the user chose to evaluate the classifier create a test_set
		if enable_evaluation == 'test':
			# Apply preprocessing on the traffic tweets for the test set
			data=[]
			for text in ttweets_test:
				temp = preprocessor().preprocess(text[0],stop_words)
				data.append(temp)
			traffic_tweets_test=add_label(data, 'traffic')
			
			# Apply preprocessing on the non-traffic tweets for the test set
			data=[]
			for text in nttweets_test:
				temp = preprocessor().preprocess(text[0],stop_words)
				data.append(temp)
			nontraffic_tweets_test=add_label(data, 'nontraffic')
			
			# Merge the tweets for the test set
			combined_tweets_test = traffic_tweets_test + nontraffic_tweets_test
			
			# Extract the features for the test set
			temp = []
			for i in range(len(combined_tweets_test)):
				temp.append(((features_extractor(combined_tweets_test[i][0])),combined_tweets_test[i][1]))
			test_set=temp
		
		# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
		# >>>>>>>>>>>>>>>>>>>>>>>>>> TRAIN THE CLASSIFIER <<<<<<<<<<<<<<<<<<<<<<<<<<<
		# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
		
		# Train our classifier using the training set
		classifier = nltk.NaiveBayesClassifier.train(train_set)
		
		# Save the classifier in a .pickle file
		name = 'naive_bayes.pickle'
		fname = os.path.join(os.path.expanduser('~/nltk_data/classifiers'), name)
		dump_classifier(classifier, fname)
		
		# Classify the tweet
		test_tweet1 = preprocessor().preprocess(test_tweet,stop_words)
		test = features_extractor(test_tweet1)
		proba = classifier.prob_classify(test)
		print "\nThe tweet '%s' is about: %s with probability: %s\n" % (test_tweet, classifier.classify(test),proba.prob('traffic'))
		
		# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
		# >>>>>>>>>>>>>>>>>>>>>>>>>> TEST THE CLASSIFIER <<<<<<<<<<<<<<<<<<<<<<<<<<<
		# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
		
		# If the user chose to evaluate the classifier apply the evaluation techniques
		if enable_evaluation == 'test':
			evaluation(test_set,classifier)
	
		
	except:	
		# Get the most recent exception
		exceptionType, exceptionValue, exceptionTraceback = sys.exc_info()
		print "Error -> %s" % exceptionValue
		lastid="0"