def compare_2ts(data, args): col = ['blue', 'red', 'black', 'green'] lab = data[0].columns[-2:] for i in range(len(data)): data[i][lab] = standartization_ts(data[i][lab]) IO.plot_2ts(iocfg(x=data, label=lab, grid=True, figsize=(20,9), color=col)) print(data[0][lab[1]].corr(data[1][lab[1]]))
def ips_distribution(data, args): default_ip_field_name = "clientip" if args.verbose: print(data) df = pd.DataFrame(list(Counter(data[default_ip_field_name]).items()), columns=['IP', 'Count']).sort_values(by='Count', ascending=False, na_position='last') IO.write(df.dropna(axis='rows'), args, "_ips.csv") IO.write(pd.DataFrame({default_ip_field_name: df['IP'].dropna()}), args, "_ipflist.csv") logging.debug("Df size is %d items.", len(df.index)) if args.verbose: print(df) df = IP.convert(df, args).dropna(axis='rows') IO.plot( iocfg(x=df['IP'], y=df['Count'], markersize=0.1, color='b', grid=True, figsize=(20, 9), title="IPs")) IO.savefig(args, "_ips.eps")
def averages_check(_ip_avg, args): logging.debug("Plot average_freq_interval") # plot average spectre: _ip_avg = _ip_avg.drop(['noname'], axis=1) means, sd = average_freq_interval(_ip_avg, args) spd = find_spectre_deviation(_ip_avg, means, sd, args) for i in range(int(args.state['time']['avgTop'])): if args.state['time']['avgType'] == 'reverse': idx = len(spd.index) - i - 1 else: idx = i args.output = "_".join(["ms", spd['ip'][idx], args.state['time']['avgType'], args.tb[0], args.tb[1]]) par = iocfg( x = means.index, y=means, addition_y=sd, figsize = (20, 9), grid=True, title="Mean spectre from {} to {} freq (by {} points, {} sample)".format(high_b, low_b, len(_ip_avg.columns), args.round), compared = _ip_avg[spd['ip'][idx]] ) IO.plot_line_err(par) IO.savefig(args, ".eps") args.output = "_".join(["ip", args.tb[0], args.tb[1]]) IO.Time.write_pretty_txt(pd.DataFrame.from_dict({"IP": ip, "Freq": freqs}), args) par = iocfg(x = iplist[iplist.columns[1]][:i], y = iplist[iplist.columns[2]][:i], title = "IP freq distribution", figsize = (12, 12), addition_x = idx, addition_y = iplist[iplist.columns[2]][idx] ) IO.plot_line(par) IO.savefig(args, "_ip_distr.eps")
def plot_entropy_distance(self, ds, dist): self._args.output = "{}_".format(ds.index[0].time()) etype = self._args.state['math']['entropyType'] if self._args.state['io']['epsOut'] == 'yes': col = ['blue', 'red'] IO.plot_general_ts( iocfg(x=[ds, dist], grid=True, title="{} entropy for {}".format(etype, self._args.field), figsize=(20, 9), color=col), self._args) IO.savefig(self._args, "{}_entropy.eps".format(etype)) IO.write(ds, self._args, "{}_entropy.csv".format(etype)) IO.write(dist, self._args, "{}_entropy_dist.csv".format(etype))
def groupby_ops(data, args): c = Counter(data[args.category]) data = data.groupby(args.category)[args.field].median().to_frame() data = data.reset_index() res = len(data.index) * [None] res_c = len(data.index) * [None] col = len(data.index) * [None] for i in range(len(data.index)): res[i] = int(ipaddress.IPv4Address(data[args.category][i])) res_c[i] = c[data[args.category][i]] col = color_assign(res_c[i]) data = data.join(pd.DataFrame({'IP': res, 'Count': res_c})) IO.plot( iocfg(x=data['IP'], y=data[args.field], markersize=1.5, color=col, grid=True, figsize=(13, 13))) IO.write(data.sort_values(by=[args.field], ascending=False), args, "_groupby.csv")