import time import copy import cPickle as pickle p_val, budget, num_edge = -0.8, 5, 10000 num_outliers = range(10, 91, 10) repeat = 20 running_times = {} # read file data = pd.read_csv('Data/dblp_%d.txt' % num_edge, delimiter=' ') # transform data = data_transform.read_data(data) users = data.groupby('SOURCE') IDs = map(int, users.groups.keys()) destinations = data.groupby('DESTINATION') AMOUNT = fix_zero_error(users['WEIGHT'].sum().values.tolist()) DEST = fix_zero_error(users['DESTINATION'].nunique().values.tolist()) LIFE = fix_zero_error(users['LIFETIME'].first().values.tolist()) IN_EDGE = fix_zero_error(users['WEIGHT'].count().values.tolist()) IAT_VAR = fix_zero_error(users['IAT_VAR'].first().values.tolist()) num_nodes = len(AMOUNT) feature_pairs = generate_pairs(continuous_features, continuous_features + discrete_features)
import sys import datetime as dt import ranklist import time from plotSpot import plotSpot from data_transform import read_data from matplotlib.backends.backend_pdf import PdfPages from math import log from helper import * from system import * from plot_functions import * from iForest import iForest P_val, Budget = parse_cmdline() # P_val and Budget data = read_data() # from data_transform.py """ Plot Generator Helper Data """ cprint("Generating Plot Helper Data") enable_warnings() users = data.groupby('SOURCE') IDs = map(int, users.groups.keys()) destinations = data.groupby('DESTINATION') print_ok("Plot Helpers Generated") """ Scatter Plots """ scatter_plots = 0 # Count of the number of scatter plots generated if scatter_show: cprint("Generating Scatter Plots") enable_warnings() ['AMOUNT', 'DEST', 'LIFE', 'IN_EDGE', 'AMT_VAR', 'IAT_VAR'] AMOUNT = fix_zero_error(users['WEIGHT'].sum().values.tolist()) DEST = fix_zero_error(users['DESTINATION'].nunique().values.tolist())
from matplotlib.backends.backend_pdf import PdfPages from math import log from helper import * from system import * from plot_functions import * from iForest import * import cPickle as pickle from sklearn.ensemble import IsolationForest datafile = 'dblp_100000.txt' continuous_features = ['DEST', 'EDGES_OUT', 'LIFE', 'MEAN_IAT', 'IAT_VAR_MEAN', 'IAT_q50'] data = read_data() cprint("Generating Plot Helper Data") enable_warnings() users = data.groupby('SOURCE') destinations = data.groupby('DESTINATION') print_ok("Plot Helpers Generated") INFO = {} # INFO['SRC'] = fix_zero_error(destinations['SOURCE'].nunique().values.tolist()) INFO['DEST'] = fix_zero_error(users['DESTINATION'].nunique().values.tolist()) INFO['LIFE'] = fix_zero_error(users['LIFETIME'].first().values.tolist()) # INFO['EDGES_IN'] = fix_zero_error(destinations['WEIGHT'].count( # ).values.tolist()) INFO['EDGES_OUT'] = fix_zero_error(users['WEIGHT'].count().values.tolist()) print 'till edges out'