Beispiel #1
0
def net_based_multi_merge(nets, datas, gamma):
    dat = datas[0]
    for data in datas[1:]:
        dat = dat.append(data)

    nn = f_create_network(gamma=gamma, data=dat)
    targ = defaultdict(list)

    for k, v in nn.items():
        for net in nets:
            targ[k] += net.get(k, []).copy()

        for el in v:
            for net in nets:
                targ[k] += net.get(el, []).copy()
            targ[k].append(el)

    return targ
Beispiel #2
0
def merge_voting_nets(nets, datas, gamma):
    dat = datas[0]
    for data in datas[1:]:
        dat = dat.append(data)

    nn = f_create_network(gamma=gamma, data=dat)

    # transfer the "votes" from original networks to just created new anchors
    targ = dict()
    for k, v in nn.items():
        for net in nets:
            if k in net:
                targ[k] = net.get(k)
                break

        for el in v:
            for net in nets:
                if el in net:
                    targ[k] = list(np.add(targ[k], net[el]))
                    break

    return targ
Beispiel #3
0
    parser.add_argument('--p', help='partition number', type=int)
    parser.add_argument('--output', help='output path', required=True)
    parser.add_argument('--gamma', help='gamma', default=.65, type=float)
    args = parser.parse_args()

    path = setup_path(args=args)
    setup_logging(path=path, parser=parser)

    setup_turi()

    gamma = args.gamma
    if gamma > 10:
        gamma = gamma / 10.0

    if gamma > 1.0:
        gamma = gamma / 10.0

    mw = load_functions_partition(directory=args.functions, name=args.p)
    logging.info(f"Stargng network calculation for gamma={gamma}")

    net = f_create_network(data=mw, gamma=gamma)
    save_nets({args.gamma: [net]},
              f"{args.gamma}-{args.p}-tc-nets",
              directory=path)
    logging.info(f"Network with {len(net)} anchors saved ")

    anchors = get_anchor_coords(net=net, data=mw)
    pp = os.path.join(path, f"anchors-{args.p}")
    anchors.save(pp, format='binary')
    logging.info(f"Anchor cords saved in {pp}")
Beispiel #4
0
    parts = partition_ndframe(nd=train, n_parts=4)
    sparts = [subsamp.filter_by(values=part, column_name='apk') for part in parts]
    ftrain = subsamp.filter_by(values=train, column_name='apk')

    labels = pd.read_csv('../data/labels_encoded.csv', index_col=0)
    classifier = lambda x: int(labels.loc[x]['malware_label'])

    nets = dict()
    intervals = 18
#    for gamma in tqdm([x * 1/intervals for x in range(0, intervals+1)]):
    gamma = 0.65
    print(f"Current {gamma=}")
   
    for p in range(0, len(parts)):
        print(f"Creating origin network {p=}")
        origin_net = f_create_network(data=sparts[p], gamma=gamma)
        origin_anchors = get_anchor_coords(net=origin_net, data=sparts[p])

        print('Creating streamed networks')
        start = time.time()
        neigh = [tc_based_nn(net=origin_net, apks=list(par), data=origin_anchors.append(spar)) 
            for par, spar in zip(parts[0:p]+parts[p+1:len(parts)], sparts[0:p]+sparts[p+1:len(sparts)])]
        dicts = [net.groupby(key_column_names='nn', operations={'nodes': agg.DISTINCT('apk')}) for net in neigh]
        true_dicts = [{row['nn']: row['nodes'] for row in nep} for nep in dicts]
        end = time.time()
        print(f"\tElapsed: {end-start}")

        print('Merging networks')
        start = time.time()
        merged = origin_net.copy()
        for d in true_dicts: