コード例 #1
0
def proxyVote(b, e):
    vote(firstProducer, firstProducer + 1)
    proxy = accounts[firstProducer]['name']
    retry(config['cleos']['path'] + 'system regproxy ' + proxy)
    sleep(1.0)
    for i in range(b, e):
        voter = accounts[i]['name']
        retry(config['cleos']['path'] + 'system voteproducer proxy ' + voter + ' ' + proxy)
コード例 #2
0
ファイル: knn.py プロジェクト: zhiyzuo/kNN4Spark
    def predict(self, other_data, _intv=None):
        '''
            Returns a RDD object which stores index and predictions
            @param
            other_data should also be a RDD object;
                        but it does NOT have class label for each sample
                        each item is stored as (index, features)

            length is the length of other_data
        '''
        from utils import cdist,vote

        # use intv when splitting one test into multiple chunks
        if _intv == None:
            length = other_data.count()
            _intv = range(length)

        # Create pair: each test point is associated with a subgroup of train data
        pairs = other_data.cartesian(self.data_feature)
        predictions = []
        for test_idx in _intv:
            dist_label_tuple_list = []
            # get subset of this test index; collect to do for loop
            idx_pairs = pairs.filter(lambda (testpoint, trainsubgroup): testpoint[0] == test_idx).collect()
            # loop through each pair
            for idx_p in idx_pairs:
                # find out the train subgroup
                train_subgroup_idx = idx_p[1][0]
                # Their Class
                C = self.data_label.filter(lambda (ind, subgroup): ind == train_subgroup_idx).collect()[0]
                dist_label_tuple_list.extend(cdist(idx_p[0], idx_p[1], C, self.k))
            predictions.append((test_idx, vote(dist_label_tuple_list, self.k)))
            del idx_pairs
 
        return predictions
コード例 #3
0
def train():
    dataset = get_data(1000, 10, 100)
    contamination = 0.01
    with mlflow.start_run():
        base_estimators = [
            LOF(n_neighbors=5, contamination=contamination),
            LOF(n_neighbors=15, contamination=contamination),
            LOF(n_neighbors=25, contamination=contamination),
            PCA(contamination=contamination),
            KNN(n_neighbors=5, contamination=contamination),
            KNN(n_neighbors=15, contamination=contamination),
            KNN(n_neighbors=25, contamination=contamination)]
        model = SUOD(base_estimators=base_estimators, n_jobs=6,  
                    rp_flag_global=True,  
                    bps_flag=True,  
                    approx_flag_global=False, 
                    contamination=contamination)
        model.fit(dataset)  
        model.approximate(dataset)  
        predicted_labels = model.predict(dataset)
        voted_labels = vote(predicted_labels)
        true_labels = [0]*1000 + [1]*10
        auc_score = roc_auc_score(voted_labels, true_labels)
        print("The resulted area under the ROC curve score is {}".format(auc_score))
        mlflow.log_metric("auc_score", auc_score)
        mlflow.sklearn.log_model(model, "anomaly_model", conda_env="conda.yaml")
コード例 #4
0
ファイル: score_test.py プロジェクト: blackbak/Socure
def main():
    test_data = get_data(args.n_normal, args.n_anomaly, 100)
    http_data = test_data.to_json(orient="split")
    headers = {
        'Content-Type': 'application/json',
    }
    response = requests.post("http://localhost:1234/invocations",
                             headers=headers,
                             data=http_data)
    predictions = np.array(response.json())
    voted_labels = vote(predictions)
    print("The predictions of the anomaly ensemble model through voting is:th")
    print(voted_labels)
コード例 #5
0
    def test_combo(self, combo, mode='test'):
        assert mode in ['dev', 'test']

        if mode == 'test':
            input_path = self.test_path
            gold_data = self.test_gold
        else:
            input_path = self.dev_path
            gold_data = self.dev_gold

        preds = [self.get_tgt_from_model(model_name) for model_name in combo]
        voted = utils.vote(preds)
        acc = utils.evaluate(gold_data, voted)
        return acc
コード例 #6
0
    def predict(self, other_data, _intv=None):
        '''
            Returns a RDD object which stores index and predictions
            @param
            other_data should also be a RDD object;
                        but it does NOT have class label for each sample
                        each item is stored as (index, features)

            length is the length of other_data
        '''
        from utils import cdist, vote

        # use intv when splitting one test into multiple chunks
        if _intv == None:
            length = other_data.count()
            _intv = range(length)

        # Create pair: each test point is associated with a subgroup of train data
        pairs = other_data.cartesian(self.data_feature)
        predictions = []
        for test_idx in _intv:
            dist_label_tuple_list = []
            # get subset of this test index; collect to do for loop
            idx_pairs = pairs.filter(lambda (testpoint, trainsubgroup):
                                     testpoint[0] == test_idx).collect()
            # loop through each pair
            for idx_p in idx_pairs:
                # find out the train subgroup
                train_subgroup_idx = idx_p[1][0]
                # Their Class
                C = self.data_label.filter(lambda (ind, subgroup): ind ==
                                           train_subgroup_idx).collect()[0]
                dist_label_tuple_list.extend(
                    cdist(idx_p[0], idx_p[1], C, self.k))
            predictions.append((test_idx, vote(dist_label_tuple_list, self.k)))
            del idx_pairs

        return predictions
コード例 #7
0
  for k in range(len(kernel)):
    svm.append(SVC(kernel=kernel[k]))
    score_train[k] = svm[k].fit(Xtr, ytr).score(Xtr, ytr)
    score_test[k] = svm[k].fit(Xtr, ytr).score(Xte, yte)
    print("{} frames, training accuracy: {} @ {} kernel".format(nf, score_train[k], kernel[k]))
    print("{} frames, testing accuracy: {} @ {} kernel".format(nf, score_test[k], kernel[k]))
    
  ind_max = np.argmax(score_test)
  best_kernel[i] = kernel[ind_max]
  best_predictor.append(svm[ind_max])
  high_score_train[i] = score_train[ind_max]
  high_score_test[i] = score_test[ind_max]
  # Predictions on all frames
  classes_test = svm[ind_max].predict(Xte)
  # Vote prediction for trials, even weight
  pred_even[i] = utils.vote(classes_test, nf, vote_opt="even")
  assert pred_even[i].shape == test_labels.shape
  # Calculate even prediction accuracy
  acc_even[i] = np.sum(pred_even[i]==test_labels, dtype=np.float32)/num_examples_test
  # Vote prediction for trials, discount weight
  pred_disc[i] = utils.vote(classes_test, nf, vote_opt="disc")
  # Calculate discounted prediction accuracy
  acc_disc[i] = np.sum(pred_disc[i]==test_labels, dtype=np.float32)/num_examples_test
  # Vote prediction for trials, logarithmic weight  
  pred_logr[i] = utils.vote(classes_test, nf, vote_opt="logr")
  # Calculate logarithm prediction accuracy
  acc_logr[i] = np.sum(pred_logr[i]==test_labels)/num_examples_test

# Find best predictor
pred_accs = np.array([acc_even, acc_disc, acc_logr])
ind = np.unravel_index(np.argmax(pred_accs, axis=None), pred_accs.shape)
コード例 #8
0
    indte = range(Xte.shape[0])  # index of test examples
    predictions = classifier.predict(input_fn=test_input_fn)
    clste = np.zeros(yte.shape).astype(int)
    correct_sum = 0
    for pred, ind in zip(predictions, indte):
        clste[ind] = pred["classes"]
        probability = pred["probabilities"][clste[ind]]
        print("Prediction is {} {:.1f}%, expected {}".format(
            clste[ind], 100 * probability, yte[ind]))
        if clste[ind] == yte[ind]:
            correct_sum += 1
    acc_te = float(correct_sum / len(indte))
    assert abs(acc_te - high_score_test[i]) < 1e-4

    # Vote prediction for trials, even weight
    pred_even[i] = utils.vote(clste, nf, vote_opt="even")
    assert pred_even[i].shape == test_labels.shape
    # Calculate even prediction accuracy
    acc_even[i] = np.sum(pred_even[i] == test_labels,
                         dtype=np.float32) / num_examples_test
    # Vote prediction for trials, discount weight
    pred_disc[i] = utils.vote(clste, nf, vote_opt="disc")
    # Calculate discounted prediction accuracy
    acc_disc[i] = np.sum(pred_disc[i] == test_labels,
                         dtype=np.float32) / num_examples_test
    # Vote prediction for trials, logarithmic weight
    pred_logr[i] = utils.vote(clste, nf, vote_opt="logr")
    # Calculate logarithm prediction accuracy
    acc_logr[i] = np.sum(pred_logr[i] == test_labels) / num_examples_test
    # Times up
    end_t = time.time()
コード例 #9
0
                label = logits.argmax(dim=1).tolist()
                for item in label:
                    result_list.append(item)

        fold_result.append(result_list)
        fold_logits_result.append(result_logits_list)
        # torch.cuda.empty_cache()

    return fold_result, fold_logits_result


if __name__ == "__main__":
    print('正在预处理...')
    fold_all, test_bert_list = utils.main()
    if do_trian:
        train(fold_all)

    # 测试
    if do_test:
        test_dataset = layers.Test_Dataset(test_bert_list)
        test_dataloader = DataLoader(dataset=test_dataset,
                                     batch_size=BATCH_SIZE,
                                     shuffle=False)
        fold_list, fold_logits_list = test(
            test_dataloader)  # fold_logits_list:(5, 5000, 2)
        vote_result = utils.vote(fold_list)
        vote_result_list, fold_logits_sum_list = utils.vote_logits(
            fold_logits_list)
        utils.write_csv(vote_result)
        utils.write_csv2(vote_result_list, fold_logits_sum_list)
        print('测试结果保存成功,请提交')
コード例 #10
0
	def predict(self):
		config = tf.ConfigProto()
		config.gpu_options.allow_growth = True
		sess = tf.Session(config = config)
		sess.run(tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()))	
		
		variable_to_save = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)	
		saver = tf.train.Saver(variable_to_save)
		saver.restore(sess, self.para.modelLoadPath)

		if (self.para.dataset == 'SICK'):
			s1, s2, score, slen1, slen2,idx = self.datas.getTestSet()
			sc = np.reshape(score, (-1, 1))
			feedDatas = [s1, s2, sc, slen1, slen2]
			_loss, _prob, _y, _prob_mse, _mse, _pr, _a1, _a2 = sess.run([self.tensorDict['loss'], self.tensorDict['prob'], self.tensorDict['y'], self.tensorDict['prob_mse'], self.tensorDict['mse'], self.tensorDict['pearson_r'], self.tensorDict['sent1_annotation'], self.tensorDict['sent2_annotation']], feed_dict = { placeholder: feedData for placeholder, feedData in zip(self.placehodlers, feedDatas)})
			print '=======================test phase========================'
			print 'test set loss: \t' + str(_loss)
			print 'test set prob_MSE: \t' + str(_prob_mse)
			print 'test set score_MSE: \t' + str(_mse)
			print 'test set pearson_r: \t', _pr
			print 'test set spearman_rho: \t', utils.spearman_rho(_y, sc)
		
			utils.analysisBatchMatrixDependency(_a1, slen1)  # measure of redundancy of annotation matrix

			if (self.para.modelType == 6):
				# inspect the annotation matrix
				for i in range(10):
					iid = random.randint(0,len(s1))
					sent1 = self.datas.displaySent(s1[iid] , slen1[iid])
					sent2 = self.datas.displaySent(s2[iid] , slen2[iid])
					annotation1 = np.squeeze(np.transpose(_a1[iid,:slen1[iid],:]))
					annotation2 = np.squeeze(np.transpose(_a2[iid,:slen2[iid],:]))
					utils.displayAttentionMat(sent1, annotation1, sent2, annotation2)

		elif (self.para.dataset == 'WikiQA'):
			# test set
			s1, s2, score, slen1, slen2, idx = self.datas.getTestSet()
			sc = np.reshape(score, (-1, 1))
			feedDatas = [s1, s2, sc, slen1, slen2]
			_loss, _prob_pos, _annotation = sess.run([self.tensorDict['loss'], self.tensorDict['prob_of_positive'], self.tensorDict['sent2_annotation']], feed_dict = {placeholder : feedData for placeholder, feedData in zip(self.placehodlers, feedDatas)})
			MRR, MAP = self.datas.evaluateOn(_prob_pos, 'test')
			print 'test set loss: \t%f' % _loss
			print 'test set MRR: \t%f' % MRR
			print 'test set MAP: \t%f' % MAP
			
			utils.analysisBatchMatrixDependency(_annotation, slen2)
	
			# randomly inspect some questions
			for i in range(10):
				iid = random.randint(1, 100)
				self.datas.displayQuestion(_prob_pos, iid, dataset = 'test')
				raw_input('Press Enter to continue...')

		elif (self.para.dataset == 'LBA'):
				#self.datas.inspectSentByLabel('test', 'Q')  # debug use
				s1, s2, score, slen1, slen2, evalSet_label, ref_label, L = self.datas.getEvalSet('both', label_set = 'all') # inspect all incorrect classificated sample with label Q
				
				sc = np.reshape(score, (-1, 1))
				labelMap = self.datas.digitLabel
				M = np.zeros((len(labelMap), len(labelMap)) , dtype = int)	# M[i][j]: the number of samples predicted to be category j while true label is i, original label are sorted by their lexicographical order
				for k in range(len(evalSet_label)):
					feedDatas = [s1[k * L:(k + 1) * L], s2[k * L:(k + 1) * L], sc[k * L:(k + 1) * L], slen1[k * L:(k + 1) * L], slen2[k * L:(k + 1) * L]]
					prob_list = []
					# for each samples in evaluation set, we have L sentences pairs.
					# we split these L records in small batches to process since L is too large.
					for batch_idx in range(int(L / 500 + 1)):
						batch_datas = [fd[batch_idx * 500 : (batch_idx + 1) * 500] for fd in feedDatas]
						_, _prob_pos = sess.run([self.tensorDict['loss'], self.tensorDict['prob_of_positive']], feed_dict = {placeholder : feedData for placeholder, feedData in zip(self.placehodlers, batch_datas)})
						prob_list.append(_prob_pos)
					_prob_pos = np.concatenate(prob_list, axis = 0)
					pred, rk = utils.vote(ref_label[k * L:(k + 1) * L], _prob_pos, top_k = 15)
					true_label = labelMap[evalSet_label[k]]
					pred_label = labelMap[pred]
					M[true_label][pred_label] += 1
					if (k % (len(evalSet_label) / 100) == 0):
						print 'progress: %f' % (1.0 * k / len(evalSet_label))
						print M
				print 'confusion matrix:\t '
				print np.array2string(M)			
				print 'over all accuracy: %f' % (1.0 * np.sum(np.diag(M)) / np.sum(M))
コード例 #11
0
    def select_new_data(self):
        self.logger.log('general', 'vote and select')
        preds = []

        # write next batch of extra data into a file
        input_path = f'{self.exp_dir}/tmp/extra_data.txt'
        self.data_iterator.get_next(input_path, self.cfg['num_input_data'])
        src = self.task.get_src_from_file(input_path)

        for model_name in self.best_combo:
            tool = model_name.split('.')[2]
            model_path = self.model_pool[model_name]

            output_path = f'{self.exp_dir}/tmp/dev.{model_name}.txt'
            self.predict(tool, model_path, input_path, output_path)

            tgt = self.task.get_tgt_from_file(output_path)
            preds.append(tgt)

        n = len(self.data_pool)
        self.data_pool[f'd{n}'] = f'{self.exp_dir}/data/d{n}.txt'

        # get agreement rate for each sentence

        # for simiulate single model self training
        if self.cfg.get('self_training', False):
            model_name = list(self.model_pool.keys())[-1]
            output_path = f'{self.exp_dir}/tmp/dev.{model_name}.txt'
            agreements = self.task.get_confidence_from_file(output_path)
        else:
            agreements = []
            for inst_preds in zip(*preds):
                agree = utils.get_agreement(inst_preds)
                # agree = utils.ngram_agreement(sent_preds, 3).mean() # agreement ratio for the sentence
                agreements.append(agree)

        # majority vote for each sentence (a bit redundant, but fine)
        voted = utils.vote(preds)

        print('src', len(src))
        print('voted', len(voted))
        print('preds', len(preds))
        print('agreement', len(agreements))

        # select the instances with high agreement and add as new training data on top of d0
        copyfile(self.data_pool['d0'], self.data_pool[f'd{n}'])

        # ALTERNATIVE:
        # select the instances with high agreement and add as new training data on top of the previous data version
        # copyfile(self.data_pool[f'd{n-1}'], self.data_pool[f'd{n}'])

        num_selected = 0
        with open(self.data_pool[f'd{n}'], 'a') as out:
            for agree, s, t in sorted(
                    zip(agreements, src,
                        voted), reverse=True)[:self.cfg['num_output_data']]:
                if agree > self.cfg['min_agreement']:
                    num_selected += 1
                    out.write(f'{s}\t{t}\n')
                    # remember in extra data
                    self.data_iterator.selected.add(s)

        self.logger.log(
            'general',
            f"selected {num_selected} / {self.cfg['num_input_data']} instances"
        )

        return f'd{n}'
コード例 #12
0
import toml
import random
from utils import vote, sleep, listProducers
config = toml.load('./config.toml')

if __name__ == '__main__':
    vote(0, 0 + config['general']['num_voters'])
    sleep(1)
    listProducers()
    
コード例 #13
0
ファイル: main.py プロジェクト: likunlun0618/DAT264x
    for fold_idx in range(opt['fold']):
        print('%dth fold:' % fold_idx)

        # 准备路径
        output = os.path.join(opt['out_dir'], str(fold_idx))
        train_file = os.path.join(opt['label_dir'], 'train_%d.csv' % fold_idx)
        valid_file = os.path.join(opt['label_dir'], 'valid_%d.csv' % fold_idx)

        # 训练
        acc, model = train(opt['train_dir'], train_file, valid_file, output,
                           opt['train_bs'], opt['num_workers'],
                           opt['num_epochs'], opt['max_N'], opt['lr_list'],
                           opt['augment'], opt)
        acc_list.append(acc)

        # 保存测试结果
        result, _ = final_test(model, opt['test_dir'], opt['test_file'])
        result_list.append(result)
        write_csv(os.path.join(output, 'result.csv'), result)

        print('')

    # 打印在验证集上的平均准确率
    print('total accuracy:', sum(acc_list) / opt['fold'])

    #投票
    final_result = vote(result_list)

    # 保存投票后的结果
    write_csv(os.path.join(opt['out_dir'], 'final_result.csv'), final_result)