parser.add_argument('--p', help='partition number', type=int) parser.add_argument('--output', help='output path', required=True) parser.add_argument('--gamma', help='gamma', default=.65, type=float) args = parser.parse_args() path = setup_path(args=args) setup_logging(path=path, parser=parser) setup_turi() gamma = args.gamma if gamma > 10: gamma = gamma / 10.0 if gamma > 1.0: gamma = gamma / 10.0 mw = load_functions_partition(directory=args.functions, name=args.p) logging.info(f"Stargng network calculation for gamma={gamma}") net = f_create_network(data=mw, gamma=gamma) save_nets({args.gamma: [net]}, f"{args.gamma}-{args.p}-tc-nets", directory=path) logging.info(f"Network with {len(net)} anchors saved ") anchors = get_anchor_coords(net=net, data=mw) pp = os.path.join(path, f"anchors-{args.p}") anchors.save(pp, format='binary') logging.info(f"Anchor cords saved in {pp}")
# for gamma in tqdm([x * 1/intervals for x in range(0, intervals+1)]): gamma = 0.65 print(f"Current {gamma=}") for p in range(0, len(parts)): print(f"Creating origin network {p=}") origin_net = f_create_network(data=sparts[p], gamma=gamma) origin_anchors = get_anchor_coords(net=origin_net, data=sparts[p]) print('Creating streamed networks') start = time.time() neigh = [tc_based_nn(net=origin_net, apks=list(par), data=origin_anchors.append(spar)) for par, spar in zip(parts[0:p]+parts[p+1:len(parts)], sparts[0:p]+sparts[p+1:len(sparts)])] dicts = [net.groupby(key_column_names='nn', operations={'nodes': agg.DISTINCT('apk')}) for net in neigh] true_dicts = [{row['nn']: row['nodes'] for row in nep} for nep in dicts] end = time.time() print(f"\tElapsed: {end-start}") print('Merging networks') start = time.time() merged = origin_net.copy() for d in true_dicts: merged = naive_merge(merged, d) end = time.time() print(f"\tElapsed: {end-start}") voting = convert_to_voting(merged, classifier) nets[gamma] = voting # save nets: save_nets(nets=nets, name=f"{len(train)}-{p}-stable-stream-nets")
from utils import setup_path, setup_logging, setup_turi from grapm import convert_to_voting, save_nets if __name__=="__main__": parser = argparse.ArgumentParser(description='Convert to voting network') parser.add_argument('--net', help='networks directory', required=True) parser.add_argument('--labels', help='apk labels', required=True) parser.add_argument('--output', help='output path', required=True) args = parser.parse_args() path = setup_path(args) setup_logging(path=path, parser=parser) setup_turi() logging.info(f"Reading labels from {args.labels}") labels = pd.read_csv(args.labels, index_col=0) classifier = lambda x: int(labels.loc[x]['malware_label']) #0.86-0-tc-nets.pickle with open(f"{args.net}", 'rb') as f: net = pickle.load(f) gamma = list(net.keys())[0] net = list(net.values())[0][0] voting = convert_to_voting(net, classifier) # args.nets will include path so we trick it with directory ='' voting_path = args.net.replace('.pickle', '-voting') save_nets({gamma: [voting]}, voting_path, directory='') logging.info(f"Voting network with {len(voting)} anchors saved ")
if gamma > 1.0: gamma = gamma / 10.0 logging.info(f"Loading networks {gamma}") networks = list() for i in range(args.p1, args.p2 + 1): p = os.path.join(args.nets, f"{gamma}-streamed-{i}.pickle") g2, net = load_net(p) networks.append(net) if g2 != gamma: logging.warning( f"Found different gamma in network file {i}: {gamma}!={g2}") gamma = g2 sizes = [len(net) for net in networks] logging.info(f"Network sizes: {sizes}") origin_net = networks[args.origin] del networks[args.origin] logging.info( f"Starting to naive merge {len(networks)} nets with gamma={gamma}") merged = origin_net.copy() for d in networks: merged = naive_merge(merged, d) save_nets({gamma: [merged]}, f"merged-{gamma}-{args.origin}-tc-nets", directory=path) logging.info(f"Saved network with {len(merged)}")
print(f"Current {gamma=}") print("Creating origin network") origin_net = f_create_network(data=sparts[0], gamma=gamma) origin_anchors = get_anchor_coords(net=origin_net, data=sparts[0]) print('Creating streamed networks') start = time.time() neigh = [tc_based_nn(net=origin_net, apks=list(par), data=origin_anchors.append(spar)) for par, spar in zip(parts[1:], sparts[1:])] dicts = [net.groupby(key_column_names='nn', operations={'nodes': agg.DISTINCT('apk')}) for net in neigh] true_dicts = [{row['nn']: row['nodes'] for row in nep} for nep in dicts] end = time.time() print(f"\tElapsed: {end-start}") print('Merging networks') start = time.time() merged = origin_net.copy() for d in true_dicts: merged = naive_merge(merged, d) end = time.time() print(f"\tElapsed: {end-start}") voting = convert_to_voting(merged, classifier) save_nets(nets={gamma: [true_dicts, origin_net]}, name=f"{gamma}-stream-singleaggregating", directory=ww) nets[gamma] = [merged, voting] # save nets: save_nets(nets=nets, name=f"{len(train)}-stream-nets")
if gamma > 1.0: gamma = gamma / 10.0 logging.info(f"Loading networks {gamma}") networks = list() anchors = list() for i in range(args.p1, args.p2 + 1): #0.85-0-tc-nets-voting.pickle with open( os.path.join(args.nets, f"{gamma}-{i}-tc-nets-voting.pickle"), 'rb') as f: net = pickle.load(f) networks.append(list(net.values())[0][0]) g2 = list(net.keys())[0] if g2 != gamma: logging.warning( f"Found different gamman in network file {i}: {gamma}!={g2}") gamma = g2 #anchors-0.85-5 anchorpath = os.path.join(args.nets, f"anchors-{gamma}-{i}") an = tc.load_sframe(anchorpath) anchors.append(an) logging.info(f"Starting to merge {len(networks)} nets with gamma={gamma}") r = merge_voting_nets(nets=networks, datas=anchors, gamma=gamma) save_nets({gamma: [r]}, f"merged-{gamma}-{args.origin}-voting", directory=path) logging.info(f"Saved network with {len(r)}")
return {a:[] for a in np.random.choice(a=apks, size=size, replace=False)} if __name__=="__main__": parser = argparse.ArgumentParser(description='Calculate random set from a partition') parser.add_argument('--functions', help='name of the functions directory', required=True) parser.add_argument('--output', help='output path', required=True) #parser.add_argument('--size', help='size', default=120, type=int) parser.add_argument('--list', nargs='+', help='Sizes list', required=True) args = parser.parse_args() path = setup_path(args=args) setup_logging(path=path, parser=parser) setup_turi() mw = load_functions_partition(directory=args.functions, name='') apks = mw['apk'].unique() for size in args.list: size = int(size) logging.info(f"Stargng network creation for size={size}") net = random_net(apks=apks, size=size) save_nets({size: [net]}, f"{size}-random-nets", directory=path) logging.info(f"Network with {len(net)} anchors saved ") anchors = get_anchor_coords(net=net, data=mw) pp = os.path.join(path, f"anchors-{size}") anchors.save(pp, format='binary') logging.info(f"Anchor cords saved in {pp}")
help='partition number', type=int, required=True) parser.add_argument('--output', help='output path', required=True) args = parser.parse_args() path = setup_path(args=args) setup_logging(path=path, parser=parser) setup_turi() logging.info(f"Loading origin network {args.net} & {args.anchors}") gamma, net = load_net(args.net) an = tc.load_sframe(args.anchors) mw = load_functions_partition(directory=args.functions, name=args.p) logging.info('Nearest neigbour search') neigh = tc_based_nn(net=net, anchors=an, partition=mw) logging.info('Conversion') dicted = neigh.groupby(key_column_names='nn', operations={'nodes': agg.DISTINCT('apk')}) true_dicts = {row['nn']: row['nodes'] for row in dicted} logging.info('Saving') save_nets({gamma: [true_dicts]}, f"{gamma}-streamed-{args.p}", directory=path) logging.info(f"Saved network with {len(true_dicts)}") # probably also save the origin network but I don't want to do it 15 times... save_nets({gamma: [net]}, f"{gamma}-streamed-0", directory=path)