Esempio n. 1
0
import time
import copy
import cPickle as pickle

p_val, budget, num_edge = -0.8, 5, 10000

num_outliers = range(10, 91, 10)
repeat = 20

running_times = {}

# read file
data = pd.read_csv('Data/dblp_%d.txt' % num_edge, delimiter=' ')

# transform
data = data_transform.read_data(data)
users = data.groupby('SOURCE')
IDs = map(int, users.groups.keys())
destinations = data.groupby('DESTINATION')

AMOUNT = fix_zero_error(users['WEIGHT'].sum().values.tolist())
DEST = fix_zero_error(users['DESTINATION'].nunique().values.tolist())
LIFE = fix_zero_error(users['LIFETIME'].first().values.tolist())
IN_EDGE = fix_zero_error(users['WEIGHT'].count().values.tolist())
IAT_VAR = fix_zero_error(users['IAT_VAR'].first().values.tolist())

num_nodes = len(AMOUNT)

feature_pairs = generate_pairs(continuous_features,
                               continuous_features + discrete_features)
Esempio n. 2
0
import sys
import datetime as dt
import ranklist
import time
from plotSpot import plotSpot
from data_transform import read_data
from matplotlib.backends.backend_pdf import PdfPages
from math import log
from helper import *
from system import *
from plot_functions import *
from iForest import iForest

P_val, Budget = parse_cmdline()  # P_val and Budget

data = read_data()  # from data_transform.py
""" Plot Generator Helper Data """
cprint("Generating Plot Helper Data")
enable_warnings()
users = data.groupby('SOURCE')
IDs = map(int, users.groups.keys())
destinations = data.groupby('DESTINATION')
print_ok("Plot Helpers Generated")
""" Scatter Plots """
scatter_plots = 0  # Count of the number of scatter plots generated
if scatter_show:
    cprint("Generating Scatter Plots")
    enable_warnings()
    ['AMOUNT', 'DEST', 'LIFE', 'IN_EDGE', 'AMT_VAR', 'IAT_VAR']
    AMOUNT = fix_zero_error(users['WEIGHT'].sum().values.tolist())
    DEST = fix_zero_error(users['DESTINATION'].nunique().values.tolist())
Esempio n. 3
0
from matplotlib.backends.backend_pdf import PdfPages
from math import log
from helper import *
from system import *
from plot_functions import *
from iForest import *
import cPickle as pickle
from sklearn.ensemble import IsolationForest


datafile = 'dblp_100000.txt'
continuous_features = ['DEST', 'EDGES_OUT', 'LIFE',
					   'MEAN_IAT', 'IAT_VAR_MEAN', 'IAT_q50']


data = read_data()
cprint("Generating Plot Helper Data")
enable_warnings()
users = data.groupby('SOURCE')
destinations = data.groupby('DESTINATION')
print_ok("Plot Helpers Generated")

INFO = {}

# INFO['SRC'] = fix_zero_error(destinations['SOURCE'].nunique().values.tolist())
INFO['DEST'] = fix_zero_error(users['DESTINATION'].nunique().values.tolist())
INFO['LIFE'] = fix_zero_error(users['LIFETIME'].first().values.tolist())
# INFO['EDGES_IN'] = fix_zero_error(destinations['WEIGHT'].count(
# ).values.tolist())
INFO['EDGES_OUT'] = fix_zero_error(users['WEIGHT'].count().values.tolist())
print 'till edges out'