Exemple #1
0
    parser.add_argument('--p', help='partition number', type=int)
    parser.add_argument('--output', help='output path', required=True)
    parser.add_argument('--gamma', help='gamma', default=.65, type=float)
    args = parser.parse_args()

    path = setup_path(args=args)
    setup_logging(path=path, parser=parser)

    setup_turi()

    gamma = args.gamma
    if gamma > 10:
        gamma = gamma / 10.0

    if gamma > 1.0:
        gamma = gamma / 10.0

    mw = load_functions_partition(directory=args.functions, name=args.p)
    logging.info(f"Stargng network calculation for gamma={gamma}")

    net = f_create_network(data=mw, gamma=gamma)
    save_nets({args.gamma: [net]},
              f"{args.gamma}-{args.p}-tc-nets",
              directory=path)
    logging.info(f"Network with {len(net)} anchors saved ")

    anchors = get_anchor_coords(net=net, data=mw)
    pp = os.path.join(path, f"anchors-{args.p}")
    anchors.save(pp, format='binary')
    logging.info(f"Anchor cords saved in {pp}")
Exemple #2
0
#    for gamma in tqdm([x * 1/intervals for x in range(0, intervals+1)]):
    gamma = 0.65
    print(f"Current {gamma=}")
   
    for p in range(0, len(parts)):
        print(f"Creating origin network {p=}")
        origin_net = f_create_network(data=sparts[p], gamma=gamma)
        origin_anchors = get_anchor_coords(net=origin_net, data=sparts[p])

        print('Creating streamed networks')
        start = time.time()
        neigh = [tc_based_nn(net=origin_net, apks=list(par), data=origin_anchors.append(spar)) 
            for par, spar in zip(parts[0:p]+parts[p+1:len(parts)], sparts[0:p]+sparts[p+1:len(sparts)])]
        dicts = [net.groupby(key_column_names='nn', operations={'nodes': agg.DISTINCT('apk')}) for net in neigh]
        true_dicts = [{row['nn']: row['nodes'] for row in nep} for nep in dicts]
        end = time.time()
        print(f"\tElapsed: {end-start}")

        print('Merging networks')
        start = time.time()
        merged = origin_net.copy()
        for d in true_dicts:
            merged = naive_merge(merged, d)
        end = time.time()
        print(f"\tElapsed: {end-start}")

        voting = convert_to_voting(merged, classifier)
        nets[gamma] = voting
        # save nets:
        save_nets(nets=nets, name=f"{len(train)}-{p}-stable-stream-nets")
Exemple #3
0
from utils import setup_path, setup_logging, setup_turi
from grapm import convert_to_voting, save_nets

if __name__=="__main__":
    parser = argparse.ArgumentParser(description='Convert to voting network')
    parser.add_argument('--net', help='networks directory', required=True)
    parser.add_argument('--labels', help='apk labels', required=True)
    parser.add_argument('--output', help='output path', required=True)
    args = parser.parse_args()

    path = setup_path(args)
    setup_logging(path=path, parser=parser)

    setup_turi()

    logging.info(f"Reading labels from {args.labels}")
    labels = pd.read_csv(args.labels, index_col=0)
    classifier = lambda x: int(labels.loc[x]['malware_label'])

    #0.86-0-tc-nets.pickle
    with open(f"{args.net}", 'rb') as f:
        net = pickle.load(f)
    
    gamma = list(net.keys())[0]
    net = list(net.values())[0][0]
    voting = convert_to_voting(net, classifier)
    # args.nets will include path so we trick it with directory =''
    voting_path = args.net.replace('.pickle', '-voting')
    save_nets({gamma: [voting]}, voting_path,  directory='')
    logging.info(f"Voting network with {len(voting)} anchors saved ")
Exemple #4
0
    if gamma > 1.0:
        gamma = gamma / 10.0

    logging.info(f"Loading networks {gamma}")
    networks = list()
    for i in range(args.p1, args.p2 + 1):
        p = os.path.join(args.nets, f"{gamma}-streamed-{i}.pickle")
        g2, net = load_net(p)
        networks.append(net)
        if g2 != gamma:
            logging.warning(
                f"Found different gamma in network file {i}: {gamma}!={g2}")
            gamma = g2

    sizes = [len(net) for net in networks]
    logging.info(f"Network sizes: {sizes}")

    origin_net = networks[args.origin]
    del networks[args.origin]

    logging.info(
        f"Starting to naive merge {len(networks)} nets with gamma={gamma}")
    merged = origin_net.copy()
    for d in networks:
        merged = naive_merge(merged, d)

    save_nets({gamma: [merged]},
              f"merged-{gamma}-{args.origin}-tc-nets",
              directory=path)
    logging.info(f"Saved network with {len(merged)}")
Exemple #5
0
        print(f"Current {gamma=}")
        print("Creating origin network")
        
        origin_net = f_create_network(data=sparts[0], gamma=gamma)
        origin_anchors = get_anchor_coords(net=origin_net, data=sparts[0])

        print('Creating streamed networks')
        start = time.time()
        neigh = [tc_based_nn(net=origin_net, apks=list(par), data=origin_anchors.append(spar)) for par, spar in zip(parts[1:], sparts[1:])]
        dicts = [net.groupby(key_column_names='nn', operations={'nodes': agg.DISTINCT('apk')}) for net in neigh]
        true_dicts = [{row['nn']: row['nodes'] for row in nep} for nep in dicts]
        end = time.time()
        print(f"\tElapsed: {end-start}")

        print('Merging networks')
        start = time.time()
        merged = origin_net.copy()
        for d in true_dicts:
            merged = naive_merge(merged, d)
        end = time.time()
        print(f"\tElapsed: {end-start}")

        voting = convert_to_voting(merged, classifier)
        
        save_nets(nets={gamma: [true_dicts, origin_net]}, name=f"{gamma}-stream-singleaggregating", directory=ww)
        nets[gamma] = [merged, voting]
        
        
    # save nets:
    save_nets(nets=nets, name=f"{len(train)}-stream-nets")
Exemple #6
0
    if gamma > 1.0:
        gamma = gamma / 10.0

    logging.info(f"Loading networks {gamma}")
    networks = list()
    anchors = list()

    for i in range(args.p1, args.p2 + 1):
        #0.85-0-tc-nets-voting.pickle
        with open(
                os.path.join(args.nets, f"{gamma}-{i}-tc-nets-voting.pickle"),
                'rb') as f:
            net = pickle.load(f)
        networks.append(list(net.values())[0][0])
        g2 = list(net.keys())[0]
        if g2 != gamma:
            logging.warning(
                f"Found different gamman in network file {i}: {gamma}!={g2}")
            gamma = g2
        #anchors-0.85-5
        anchorpath = os.path.join(args.nets, f"anchors-{gamma}-{i}")
        an = tc.load_sframe(anchorpath)
        anchors.append(an)

    logging.info(f"Starting to merge {len(networks)} nets with gamma={gamma}")
    r = merge_voting_nets(nets=networks, datas=anchors, gamma=gamma)
    save_nets({gamma: [r]},
              f"merged-{gamma}-{args.origin}-voting",
              directory=path)
    logging.info(f"Saved network with {len(r)}")
Exemple #7
0
    return {a:[] for a in np.random.choice(a=apks, size=size, replace=False)}



if __name__=="__main__":
    parser = argparse.ArgumentParser(description='Calculate random set from a partition')
    parser.add_argument('--functions', help='name of the functions directory', required=True)
    parser.add_argument('--output', help='output path', required=True)
    #parser.add_argument('--size', help='size', default=120, type=int)
    parser.add_argument('--list', nargs='+', help='Sizes list', required=True)
    args = parser.parse_args()
   
    path = setup_path(args=args)
    setup_logging(path=path, parser=parser)
    setup_turi()

    mw = load_functions_partition(directory=args.functions, name='')
    apks = mw['apk'].unique()

    for size in args.list:
        size = int(size)
        logging.info(f"Stargng network creation for size={size}")
        net = random_net(apks=apks, size=size)
        save_nets({size: [net]}, f"{size}-random-nets",  directory=path)
        logging.info(f"Network with {len(net)} anchors saved ")

        anchors = get_anchor_coords(net=net, data=mw)
        pp = os.path.join(path, f"anchors-{size}")
        anchors.save(pp, format='binary')
        logging.info(f"Anchor cords saved in {pp}")
Exemple #8
0
                        help='partition number',
                        type=int,
                        required=True)
    parser.add_argument('--output', help='output path', required=True)
    args = parser.parse_args()

    path = setup_path(args=args)
    setup_logging(path=path, parser=parser)
    setup_turi()

    logging.info(f"Loading origin network {args.net} & {args.anchors}")
    gamma, net = load_net(args.net)
    an = tc.load_sframe(args.anchors)

    mw = load_functions_partition(directory=args.functions, name=args.p)

    logging.info('Nearest neigbour search')
    neigh = tc_based_nn(net=net, anchors=an, partition=mw)
    logging.info('Conversion')
    dicted = neigh.groupby(key_column_names='nn',
                           operations={'nodes': agg.DISTINCT('apk')})
    true_dicts = {row['nn']: row['nodes'] for row in dicted}

    logging.info('Saving')
    save_nets({gamma: [true_dicts]},
              f"{gamma}-streamed-{args.p}",
              directory=path)
    logging.info(f"Saved network with {len(true_dicts)}")
    # probably also save the origin network but I don't want to do it 15 times...
    save_nets({gamma: [net]}, f"{gamma}-streamed-0", directory=path)