Ejemplo n.º 1
0
Archivo: RCAE.py Proyecto: kiminh/AMAD
def run(args): 
	#load configuration
	config = Config(args) 
	#load data
	path = args.input
	if args.categorical:
		training_data,training_label,testing_data,testing_label = ld.load_categorical(path)
	else:
		training_data,training_label,testing_data,testing_label = ld.load_numberic(path) 
	instance_dim = len(training_data[0])

	with tf.Graph().as_default(),tf.Session() as sess:

		model = RCAE(instance_dim,
						config.hidden_dim)
	
		init = tf.global_variables_initializer()  
		sess.run(init)
		
		for epoch in range(config.epoch):
			#training
			batch_num = len(training_data)//config.batch_size
			for i in range(batch_num):
				curr_batch = training_data[i*config.batch_size:(i+1)*config.batch_size]	
				feed_dict = {model.data: curr_batch}
				result = sess.run((model.D_train,model.G_train,model.average_loss),feed_dict=feed_dict)
				loss_threshold = result[2]
				if i % 100 == 0:
					# if threshold < loss_threshold:
					# 	loss_threshold = threshold
					print("In epoch %d and batch %d, average loss: %.4f "%(epoch,i,loss_threshold))
Ejemplo n.º 2
0
Archivo: ALOCC.py Proyecto: kiminh/AMAD
def run(args): 
	#load configuration
	config = Config(args) 
	#load data
	path = args.input
	if args.categorical:
		training_data,training_label,testing_data,testing_label = ld.load_categorical(path)
	else:
		training_data,training_label,testing_data,testing_label = ld.load_numeric(path) 
	instance_dim = len(training_data[0])
	#shuffle testing data,to ensure testing data and label are shuffled in the same way
	randnum = random.randint(0,100)
	random.seed(randnum)
	random.shuffle(testing_data)
	random.seed(randnum)
	random.shuffle(testing_label)
	with tf.Graph().as_default(),tf.Session() as sess:

		model = ALOCC(instance_dim,
						config.hidden_dim,
						config.alpha,
						config.beta,
						config.gamma,
						config.learning_rate)
	
		init = tf.global_variables_initializer()  
		sess.run(init)
		
		
		for epoch in range(config.epoch):
			#training
			batch_num = len(training_data)//config.batch_size
			for i in range(batch_num):
				curr_batch = training_data[i*config.batch_size:(i+1)*config.batch_size]	
				feed_dict = {model.data: curr_batch}
				result = sess.run((model.D_train,model.G_train,model.average_loss),feed_dict=feed_dict)
				loss_threshold = result[2]
				if i % 100 == 0:
					# if threshold < loss_threshold:
					# 	loss_threshold = threshold
					print("In epoch %d and batch %d, average loss: %.4f "%(epoch,i,loss_threshold))
					
			'''
			#####
			testing after each epoch
			#####
			'''
			#individual instance level evaluation
			loss_threshold = loss_threshold * config.threshold_scale
			feed_dict = {model.data: testing_data}
			testing_data_loss = sess.run(model.total_loss,feed_dict=feed_dict)
			# print("testing_data_loss",testing_data_loss,testing_data_loss.shape)
			individual_pred = []
			for i in range(len(testing_data_loss)):
				if testing_data_loss[i] < loss_threshold:
					individual_pred.append(1)
				else:
					individual_pred.append(0)
			# print(len(individual_pred),len(testing_label))
			instance_eval = eval(testing_label,individual_pred)	

			#write instance loss to file
			bw = open(args.instance_output, 'w')
			bw.write("true pred\n")
			for i in range(len(testing_data_loss)):
				bw.write(str(testing_label[i])+ " "+str(testing_data_loss[i])+"\n")
			bw.close()

			#block level evaluation
			pred_block = []
			true_block = []
			testing_block_num = len(testing_data) // config.block_size
			for i in range(testing_block_num):
				pred_sum = np.sum(individual_pred[i*config.block_size:(i+1)*config.block_size])
				true_sum = np.sum(testing_label[i*config.block_size:(i+1)*config.block_size])
				if pred_sum < config.block_size*config.block_ratio:
					pred_block.append(0)
				else:
					pred_block.append(1)
				if true_sum < config.block_size*config.block_ratio:
					true_block.append(0)
				else:
					true_block.append(1)
			block_eval = eval(true_block,pred_block)

			#write block loss to file
			bw = open(args.block_output, 'w')
			bw.write("true pred\n")
			for i in range(testing_block_num):
				true_block = "1"
				pred_block = np.mean(testing_data_loss[i*config.block_size:(i+1)*config.block_size])
				true_sum = np.sum(testing_label[i*config.block_size:(i+1)*config.block_size])
				if true_sum < config.block_size*config.block_ratio:
					true_block = "0"
				bw.write(true_block+ " "+str(pred_block)+"\n")
			bw.close()
			# print(true_block,pred_block)
			print("instance level evaluation: ",instance_eval)
			print("block level evaluation: ",block_eval)				
Ejemplo n.º 3
0
def IsoForest(args):
    #load data
    path = args.input
    if args.categorical:
        training_data, training_label, testing_data, testing_label = ld.load_categorical(
            path)
    else:
        training_data, training_label, testing_data, testing_label = ld.load_numeric(
            path)
    #shuffle testing data,to ensure testing data and label are shuffled in the same way
    randnum = random.randint(0, 100)
    random.seed(randnum)
    random.shuffle(testing_data)
    random.seed(randnum)
    random.shuffle(testing_label)

    clf = IsolationForest(max_samples=100)
    clf.fit(training_data)
    label_pred = clf.predict(testing_data)
    individual_pred = []
    for i in range(len(label_pred)):
        if label_pred[i] == -1:
            individual_pred.append(0)
        else:
            individual_pred.append(label_pred[i])

    instance_eval = eval(testing_label, individual_pred)

    score = clf.score_samples(testing_data)
    print("data", testing_label, label_pred, score)
    #write instance loss to file
    bw = open(args.instance_output, 'w')
    bw.write("true pred\n")
    for i in range(len(score)):
        bw.write(str(testing_label[i]) + " " + str(score[i]) + "\n")
    bw.close()

    #block level evaluation
    pred_block = []
    true_block = []
    testing_block_num = len(testing_data) // args.block_size
    for i in range(testing_block_num):
        pred_sum = np.sum(individual_pred[i * args.block_size:(i + 1) *
                                          args.block_size])
        true_sum = np.sum(testing_label[i * args.block_size:(i + 1) *
                                        args.block_size])
        if pred_sum < args.block_size * args.block_ratio:
            pred_block.append(0)
        else:
            pred_block.append(1)
        if true_sum < args.block_size * args.block_ratio:
            true_block.append(0)
        else:
            true_block.append(1)
    block_eval = eval(true_block, pred_block)

    #write block loss to file
    bw = open(args.block_output, 'w')
    bw.write("true pred\n")
    for i in range(testing_block_num):
        true_block = "1"
        pred_block = np.mean(score[i * args.block_size:(i + 1) *
                                   args.block_size])
        true_sum = np.sum(testing_label[i * args.block_size:(i + 1) *
                                        args.block_size])
        if true_sum < args.block_size * args.block_ratio:
            true_block = "0"
        bw.write(true_block + " " + str(pred_block) + "\n")
    bw.close()

    # print("pred_block",pred_block,"true_block",true_block)
    print("instance level evaluation: ", instance_eval)
    print("block level evaluation: ", block_eval)