def net_based_multi_merge(nets, datas, gamma): dat = datas[0] for data in datas[1:]: dat = dat.append(data) nn = f_create_network(gamma=gamma, data=dat) targ = defaultdict(list) for k, v in nn.items(): for net in nets: targ[k] += net.get(k, []).copy() for el in v: for net in nets: targ[k] += net.get(el, []).copy() targ[k].append(el) return targ
def merge_voting_nets(nets, datas, gamma): dat = datas[0] for data in datas[1:]: dat = dat.append(data) nn = f_create_network(gamma=gamma, data=dat) # transfer the "votes" from original networks to just created new anchors targ = dict() for k, v in nn.items(): for net in nets: if k in net: targ[k] = net.get(k) break for el in v: for net in nets: if el in net: targ[k] = list(np.add(targ[k], net[el])) break return targ
parser.add_argument('--p', help='partition number', type=int) parser.add_argument('--output', help='output path', required=True) parser.add_argument('--gamma', help='gamma', default=.65, type=float) args = parser.parse_args() path = setup_path(args=args) setup_logging(path=path, parser=parser) setup_turi() gamma = args.gamma if gamma > 10: gamma = gamma / 10.0 if gamma > 1.0: gamma = gamma / 10.0 mw = load_functions_partition(directory=args.functions, name=args.p) logging.info(f"Stargng network calculation for gamma={gamma}") net = f_create_network(data=mw, gamma=gamma) save_nets({args.gamma: [net]}, f"{args.gamma}-{args.p}-tc-nets", directory=path) logging.info(f"Network with {len(net)} anchors saved ") anchors = get_anchor_coords(net=net, data=mw) pp = os.path.join(path, f"anchors-{args.p}") anchors.save(pp, format='binary') logging.info(f"Anchor cords saved in {pp}")
parts = partition_ndframe(nd=train, n_parts=4) sparts = [subsamp.filter_by(values=part, column_name='apk') for part in parts] ftrain = subsamp.filter_by(values=train, column_name='apk') labels = pd.read_csv('../data/labels_encoded.csv', index_col=0) classifier = lambda x: int(labels.loc[x]['malware_label']) nets = dict() intervals = 18 # for gamma in tqdm([x * 1/intervals for x in range(0, intervals+1)]): gamma = 0.65 print(f"Current {gamma=}") for p in range(0, len(parts)): print(f"Creating origin network {p=}") origin_net = f_create_network(data=sparts[p], gamma=gamma) origin_anchors = get_anchor_coords(net=origin_net, data=sparts[p]) print('Creating streamed networks') start = time.time() neigh = [tc_based_nn(net=origin_net, apks=list(par), data=origin_anchors.append(spar)) for par, spar in zip(parts[0:p]+parts[p+1:len(parts)], sparts[0:p]+sparts[p+1:len(sparts)])] dicts = [net.groupby(key_column_names='nn', operations={'nodes': agg.DISTINCT('apk')}) for net in neigh] true_dicts = [{row['nn']: row['nodes'] for row in nep} for nep in dicts] end = time.time() print(f"\tElapsed: {end-start}") print('Merging networks') start = time.time() merged = origin_net.copy() for d in true_dicts: