def size_count(packets):
    outgoing = []
    incoming = []
    for p in packets:
        if p[2] == -1:
            incoming.append(p)
        else:
            outgoing.append(p)

    try:
        out_size_list = su.csv_numpy('stats/adapt_out_distribution_size.csv')
    except IOError:
        with open('stats/adapt_out_distribution_size.csv', 'a') as build:
            writer = csv.writer(build)
            writer.writerow(['size', 'count'])
        out_size_list = su.csv_numpy('stats/adapt_out_distribution_size.csv')
    try:
        in_size_list = su.csv_numpy('stats/adapt_in_distribution_size.csv')
    except IOError:
        with open('stats/adapt_in_distribution_size.csv', 'a') as build:
            writer = csv.writer(build)
            writer.writerow(['size', 'count'])
        in_size_list = su.csv_numpy('stats/adapt_in_distribution_size.csv')

    out_exist = False
    for p in outgoing:
        for s in out_size_list:
            if p[1] == s[0]:
                s[1] = s[1] + 1
                out_exist = True
                break
        if not out_exist:
            new_out_size = [p[1], 1]
            out_size_list.append(new_out_size)
        out_exist = False

    db_df = pd.DataFrame(out_size_list, columns=['size', 'count'])
    db_df.to_csv('stats/adapt_out_distribution_size.csv', index=False)

    in_exist = False
    for p in incoming:
        for s in in_size_list:
            if p[1] == s[0]:
                s[1] = s[1] + 1
                in_exist = True
                break
        if not in_exist:
            new_in_size = [p[1], 1]
            in_size_list.append(new_in_size)
        in_exist = False
    db_df = pd.DataFrame(in_size_list, columns=['size', 'count'])
    db_df.to_csv('stats/adapt_in_distribution_size.csv', index=False)
Example #2
0
def main(opts):
    # duplex_path = "half_duplex/Announce_Happy_Valentines_Day_29__0218._HD.csv"
    # out_interval = 0.5
    # in_interval = 1
    # size = 1500

    duplex_path = opts.hdPath
    size = float(opts.size)
    out_interval = float(opts.oInterval)
    in_interval = float(opts.iInterval)

    pf = Path(duplex_path)

    trace_name = pf.name[0:-7]
    duplex_list = su.csv_numpy(duplex_path)
    index = divide_list(duplex_list)
    outgoing_list = duplex_list[0:index + 1]
    # same_outgoing = outgoing_process_buflo_method(outgoing_list, 1500, out_interval, size)

    same_outgoing = outgoing_process(trace_name, out_interval)
    outgoing_end = same_outgoing[-1][0]

    incoming_list = duplex_list[index + 1:len(duplex_list)]
    logk_incoming = incoming_process(trace_name, incoming_list, in_interval,
                                     size, outgoing_end)
    logk_list = same_outgoing + logk_incoming

    logk_df = pd.DataFrame(
        logk_list, columns=['time', 'size', 'direction', 'overhead', 'type'])
    logk_df.to_csv("logk_list/" + trace_name + '_logk_.csv', index=False)

    print('logk of ' + trace_name + "is finished")
Example #3
0
def outgoing_process(trace_name, interval):
    outgoing_stats = su.csv_numpy("stats/filter_stats.csv")
    outgoing_stats.sort(key=su.sort_by_fourth)
    out_num = outgoing_stats[-1][3]
    size = 1500
    original_num = 0
    remainder = 0
    time = 0
    for p in outgoing_stats:
        if p[0] == trace_name:
            original_num = p[3]
            remainder = p[4]
            break
    out_list = []
    for i in range(int(original_num) + 1):
        time = i * interval
        packet = [time, size, 1, 0, 'original']
        if i == original_num:
            packet = [time, size, 1, 1500 - remainder, 'padded']
        out_list.append(packet)
    for i in range(int(out_num - original_num - 1)):
        time = time + (i + 1) * interval
        packet = [time, size, 1, size, 'dummy']
        out_list.append(packet)

    return out_list
def main(opts):
    csv_path = opts.csvPath

    packets = su.csv_numpy(csv_path)
    pf = Path(csv_path)
    trace_name = pf.name[0:-4]
    size_count(packets)
    interval_count(packets)
    print(csv_path + 'is finished')
Example #5
0
def incoming_process(trace_name, incoming_list, interval, size, start_time):
    distribution = su.csv_numpy("stats/logk_distribution.csv")
    stats = su.csv_numpy("stats/filter_stats.csv")

    log_in_num = 0
    remainder = 0
    duplex_in_num = 0
    time = 0

    for p in stats:
        if p[0] == trace_name:
            duplex_in_num = p[5]
            remainder = p[6]
            break
    for p in distribution:
        if su.same_name(p[0], trace_name):
            if p[-1] == '':
                added_num = 0
            else:
                added_num = int(float(p[-1]))
            log_in_num = added_num + p[-3]

            break
    # original_end = packets[-1][1]
    incoming = []
    added_num = log_in_num - duplex_in_num - 1
    index = 0
    for i in range(int(duplex_in_num) + 1):
        time = start_time + interval * (i + 1)
        # index = start_index + i + 1
        direction = -1
        original_packet = [time, size, direction, 0, 'original']
        if i == duplex_in_num:
            original_packet = [
                time, size, direction, size - remainder, 'padded'
            ]
        incoming.append(original_packet)
    for i in range(int(added_num)):
        time = time + interval
        # index = index + i + 1
        direction = -1
        dummy_packet = [time, size, direction, size, 'dummy']
        incoming.append(dummy_packet)
    return incoming
def logk_process(filter_in_stats):
    filter_in_stats.sort(key=su.sort_by_name)
    same_traces = []
    for i, p in enumerate(filter_in_stats):
        if i == 0:
            same_traces.append(p)
            continue
        if i == len(filter_in_stats) - 1:
            same_traces.append(p)
            same_traces.sort(key=su.sort_by_third)
            with open("stats/logk_analysis.csv", 'a') as _in:
                writer = csv.writer(_in)
                writer.writerow(same_traces[-1])
            # traces_df = pd.DataFrame(same_traces)
            # traces_df.to_csv('stats' + filter_in_stats[i - 1][0] + '.csv')
            continue
        if su.same_name(p[0], filter_in_stats[i - 1][0]):
            same_traces.append(p)
        else:
            same_traces.sort(key=su.sort_by_third)
            with open("stats/logk_analysis.csv", 'a') as _in:
                writer = csv.writer(_in)
                writer.writerow(same_traces[-1])

            same_traces = []
            same_traces.append(p)

    filter_in_stats = su.csv_numpy("stats/logk_analysis.csv")
    filter_in_stats.sort(key=su.sort_by_third)

    l = len(filter_in_stats)
    k = int(np.ceil(l / 2))
    start = 0
    logk_list = []

    while (k >= 1):
        if start + k <= len(filter_in_stats):
            print("index({},{})".format(start, start + k - 1))
            if start == len(filter_in_stats) - 2:
                anonymity_list = filter_in_stats[start:(start + k + 1)]
            else:
                anonymity_list = filter_in_stats[start:(start + k)]
            logk = add_dummy(anonymity_list)
            start = start + k
            logk_list = logk_list + logk

        else:
            break
        k = int(np.ceil((l - start) / 2))
        print(len(logk_list))
    echo_df2 = pd.DataFrame(logk_list,
                            columns=[
                                'name', 'original_in_num', 'duplex_num',
                                'remainder', 'padded_num'
                            ])
    echo_df2.to_csv("stats/logk_distribution.csv", index=False)
def main(opts):
    # csv_path = 'csv/gamma/1/Announce_Happy_Valentines_Day_??_Google_0_.csv'
    csv_path = opts.csvPath

    packets = su.csv_numpy(csv_path)
    pf = Path(csv_path)
    trace_name = pf.name[0:-4]
    size_count(packets)
    interval_count(packets)
    print(csv_path + 'is finished')
Example #8
0
def main(opt):
    # csv_path = 'csv/gamma/1/Announce_Happy_Valentines_Day_??_Google_0_.csv'
    # folder = 'test'
    # eps = 0.005
    csv_path = opts.csvPath
    folder = opts.folder
    eps = float(opts.eps)
    packets = su.csv_numpy(csv_path)
    pf = Path(csv_path)
    trace_name = pf.name[0:-4]
    distribution_generator(packets, trace_name, folder, eps)
    print(csv_path + ' is finished')
Example #9
0
def main(opts):
    # csv_path = "/home/lhp/PycharmProjects/pcap_csv/csv/Announce_Happy_Valentines_Day_1__0218.csv"
    # half_duplex_path = "/home/lhp/PycharmProjects/pcap_csv/half_duplex/"
    csv_path = opts.csvPath
    half_duplex_path = opts.duplexPath
    pf = Path(csv_path)
    trace_name = pf.name[0:-3]

    packet_list = su.csv_numpy(csv_path)

    duplex_list = half_duplex(packet_list, trace_name)

    duplex_df = pd.DataFrame(duplex_list,
                             columns=['time', 'size', 'direction'])
    duplex_df.to_csv(half_duplex_path + trace_name + "_HD.csv", index=False)
    print("Half_duplex of " + trace_name + " is finished")
def main():
    # incoming_stats_path = "/home/lhp/PycharmProjects/pcap_csv/stats/incoming_number.csv"
    # outgoing_stats_path = "/home/lhp/PycharmProjects/pcap_csv/stats/outgoing_number.csv"
    #
    # in_stats = csv_numpy(incoming_stats_path)
    # out_stats = csv_numpy(outgoing_stats_path)
    # in_stats.sort(key=sort_by_second)

    stats_path = "stats/stats.csv"
    stats = su.csv_numpy(stats_path)
    for p in stats:
        num = int(p[3]) + int(p[5])
        p.append(num)

    stats_out = outgoing_process(stats)

    incoming_process(stats_out)
def main(opt):

    csv_path = opts.csvPath
    folder = opts.folder

    eps = float(opts.eps)

    dst = '/home/lhp/PycharmProjects/2019_spring_data/optionB/' + str(
        eps) + '/' + folder

    if not os.path.isdir(dst):
        os.makedirs(dst)

    packets = su.csv_numpy(csv_path)
    pf = Path(csv_path)
    trace_name = pf.name[0:-4]
    distribution_generator(packets, trace_name, folder, eps)
    print(csv_path + ' is finished')
def main():
    # incoming_stats_path = "/home/lhp/PycharmProjects/pcap_csv/stats/incoming_number.csv"
    # outgoing_stats_path = "/home/lhp/PycharmProjects/pcap_csv/stats/outgoing_number.csv"
    #
    # in_stats = csv_numpy(incoming_stats_path)
    # out_stats = csv_numpy(outgoing_stats_path)
    # in_stats.sort(key=sort_by_second)

    stats_path = "stats/stats.csv"
    stats = su.csv_numpy(stats_path)
    for p in stats:
        num = int(p[3]) + int(p[5])
        p.append(num)
    # echo_df2 = pd.DataFrame(stats, columns=['name', 'original_out_num', 'original_in_num', 'duplex_out_num', 'duplex_in_num','max_interval','min_interva','ave_interval','original_total_num'])
    # echo_df2.to_csv("stats/stats1.csv", index=False)

    stats_out = outgoing_process(stats)

    incoming_process(stats_out)
def distribution_generator(packets, trace_name, folder, eps):

    in_size_path = 'stats/distribution_gamma/adapt_in_distribution_size.csv'
    in_interval_path = 'stats/distribution_gamma/adapt_in_distribution_interval.csv'
    in_size_list = su.csv_numpy(in_size_path)
    in_interval_list = su.csv_numpy(in_interval_path)

    in_size_list.sort(key=su.sort_by_second, reverse=True)
    in_interval_list.sort(key=su.sort_by_second, reverse=True)
    in_size_list = in_size_list[0:100]
    calculate_ratio(in_size_list)
    calculate_ratio(in_interval_list)

    ori_end = packets[-1][0]

    ori_size = 0

    outgoing = []
    incoming = []
    for p in packets:
        if p[2] == -1:
            incoming.append(p)
        else:
            outgoing.append([p[0], 0, p[2]])
    for p in incoming:
        ori_size = ori_size + p[1]
    (outgoing_ap, outgoing_lap, out_ap_overhead, out_lap_overhead,
     out_ap_et_overhead, out_lap_et_overhead, out_unfinished, out_proc_q,
     positive_1) = fill_gap_lap(outgoing, out_size_list, out_interval_list,
                                eps)

    (incoming_ap, incoming_lap, in_ap_overhead, in_lap_overhead,
     in_ap_et_overhead, in_lap_et_overhead, in_unfinished, in_proc_q,
     positive_2) = fill_gap_lap(incoming, in_size_list, in_interval_list, eps)

    outgoing_lap.pop(0)
    incoming_lap.pop(0)
    outgoing_ap.pop(0)
    incoming_ap.pop(0)
    buffer_list = list(out_proc_q.queue) + list(in_proc_q.queue)
    buffer_list.sort(key=su.sort_by_name)
    # # buffer_list.append([positive_1 + positive_2])
    buffer_list.append([len(incoming_lap) + len(outgoing_lap)])
    buffer_list = list(in_proc_q.queue)
    try:
        buffer_df = pd.DataFrame(buffer_list,
                                 columns=[
                                     'buffered_time', 'buffered_index', 'size',
                                     'cleaned_time', 'cleaned_index',
                                     'dummy_n', 'real_n'
                                 ])
        buffer_df.to_csv(
            '/home/lhp/PycharmProjects/2019_spring_data/optionB/' + str(eps) +
            '/' + folder + '/' + trace_name + 'buffer.csv',
            index=False)
    except AssertionError:
        print('no proc queue!!!')

    ap_list = outgoing_ap + incoming_ap
    ap_list.sort(key=su.sort_by_name)
    real_ap_overhead = out_ap_overhead + in_ap_overhead
    real_lap_overhead = out_lap_overhead + in_lap_overhead
    et_ap_overhead = out_ap_et_overhead + in_ap_et_overhead
    et_lap_overhead = out_lap_et_overhead + in_lap_et_overhead
    ap_overall_overhead = real_ap_overhead + et_ap_overhead
    lap_overall_overhead = real_lap_overhead + et_lap_overhead
    unfinished = out_unfinished or in_unfinished

    real_ap_overhead = in_ap_overhead
    real_lap_overhead = in_lap_overhead
    et_ap_overhead = in_ap_et_overhead
    et_lap_overhead = in_lap_et_overhead
    ap_overall_overhead = real_ap_overhead + et_ap_overhead
    lap_overall_overhead = real_lap_overhead + et_lap_overhead
    unfinished = in_unfinished

    # # # ap_df = pd.DataFrame(ap_list, columns=['time', 'size', 'direction', 'type'])
    # # # ap_df.to_csv('obf_data/adapt_list/'+ folder + '/' + trace_name + '_ap.csv', index=False)
    # #
    lap_list = incoming_lap + outgoing
    lap_list.sort(key=su.sort_by_name)
    info_stat(eps, trace_name, ori_size, real_ap_overhead, et_ap_overhead,
              ap_overall_overhead, real_lap_overhead, et_lap_overhead,
              lap_overall_overhead, ori_end, lap_list[-1][0] - ori_end,
              unfinished)
    lap_df = pd.DataFrame(lap_list,
                          columns=['time', 'size', 'direction', 'type'])

    if not os.path.isdir('obf_data/lap_list/' + str(eps) + '/' + folder):
        os.makedirs('obf_data/lap_list/' + str(eps) + '/' + folder)
    lap_df.to_csv('obf_data/lap_list/' + str(eps) + '/' + folder + '/' +
                  trace_name + 'lap.csv',
                  index=False)
def interval_count(packets):

    init_out_list = [[0.00001, 0], [0.00005, 0], [0.0001, 0], [0.0005, 0],
                     [0.001, 0], [0.003, 0], [0.005, 0], [0.01, 0], [0.012, 0],
                     [0.014, 0], [0.016, 0], [0.018, 0], [0.02, 0], [0.025, 0],
                     [0.03, 0], [0.05, 0], [0.1, 0], [0.5, 0], [1, 0],
                     [100000, 0]]

    init_in_list = [[0.00001, 0], [0.0001, 0], [0.00013, 0], [0.00015, 0],
                    [0.00017, 0], [0.0002, 0], [0.00025, 0], [0.0003, 0],
                    [0.0005, 0], [0.001, 0], [0.005, 0], [0.01, 0], [0.03, 0],
                    [0.05, 0], [0.07, 0], [0.1, 0], [0.5, 0], [1.0, 0],
                    [2.0, 0], [100000, 0]]

    outgoing = []
    incoming = []
    for p in packets:
        if p[2] == -1:
            incoming.append(p)
        else:
            outgoing.append(p)

    try:
        out_interval_list = su.csv_numpy(
            'stats/adapt_out_distribution_interval.csv')
    except IOError:
        with open('stats/adapt_out_distribution_interval.csv', 'a') as build:
            writer = csv.writer(build)
            writer.writerow(['interval', 'count'])
            for p in init_out_list:
                writer.writerow(p)
        out_interval_list = su.csv_numpy(
            'stats/adapt_out_distribution_interval.csv')

    try:
        in_interval_list = su.csv_numpy(
            'stats/adapt_in_distribution_interval.csv')
    except IOError:
        with open('stats/adapt_in_distribution_interval.csv', 'a') as build:
            writer = csv.writer(build)
            writer.writerow(['interval', 'count'])
            for p in init_in_list:
                writer.writerow(p)
        in_interval_list = su.csv_numpy(
            'stats/adapt_in_distribution_interval.csv')

    for i, p in enumerate(outgoing):
        if i == 0:
            continue
        out_interval = p[0] - outgoing[i - 1][0]
        for k in out_interval_list:
            if out_interval <= k[0]:
                k[1] += 1
                break
    db_df = pd.DataFrame(out_interval_list, columns=['interval', 'count'])
    db_df.to_csv('stats/adapt_out_distribution_interval.csv', index=False)

    for i, p in enumerate(incoming):
        if i == 0:
            continue
        in_interval = p[0] - incoming[i - 1][0]
        for k in in_interval_list:
            if in_interval <= k[0]:
                k[1] += 1
                break
    db_df = pd.DataFrame(in_interval_list, columns=['interval', 'count'])
    db_df.to_csv('stats/adapt_in_distribution_interval.csv', index=False)
Example #15
0
def main(opts):
    # csv_path = 'csv/April/Announce_Happy_Valentines_Day_??_Google_0_.csv'
    # # csv_path = opts.csvPath
    # packets = su.csv_numpy(csv_path)
    # feature_chi(packets)

    features_title = [
        'name', 'total_time', 'total_num', 'total_bytes', 'burst_num',
        'in_burst_num', 'in_burst_ratio', 'out_burst_num', 'out_burst_ratio',
        'in_burst_bandwidth', 'out_burst_bandwidth', 'max_in_bandwidth',
        'max_out_bandwidth', 'incoming_num', 'in_num_ratio', 'outgoing_num',
        'out_num_ratio', 'incoming_bytes', 'in_bytes_ratio', 'outgoing_bytes',
        'out_bytes_ratio'
    ]

    path = 'csv/April'

    with open('stats/feature2.csv', 'a') as title:
        writer = csv.writer(title)
        writer.writerow(features_title)
    m = 0
    files = os.listdir(path)
    query_dict = {}
    for f in files:

        m += 1
        src = path + '/' + f
        packets = su.csv_numpy(src)
        trace_name = su.extract_name(f)
        features_cal(trace_name, packets)
        # (total_time, total_num, total_bytes, burst_num,
        # in_burst_num, in_burst_ratio,
        # out_burst_num, out_burst_ratio,
        # in_burst_bandwidth, out_burst_bandwidth,
        # max_in_bandwidth, max_out_bandwidth,
        # incoming_num, in_num_ratio,
        # outgoing_num, out_num_ratio,
        # incoming_bytes, in_bytes_ratio,
        # outgoing_bytes, out_bytes_ratio) = features_cal(trace_name, packets)

        # if trace_name not in query_dict:
        #     q = query(f,total_time, total_num, total_bytes, burst_num,
        #         in_burst_num, in_burst_ratio,
        #         out_burst_num, out_burst_ratio,
        #         in_burst_bandwidth, out_burst_bandwidth,
        #         max_in_bandwidth, max_out_bandwidth,
        #         incoming_num, in_num_ratio,
        #         outgoing_num, out_num_ratio,
        #         incoming_bytes, in_bytes_ratio,
        #         outgoing_bytes, out_bytes_ratio)
        #
        #     new_query = {trace_name:q}
        #     query_dict.update(new_query)
        # else:
        #     query_dict[trace_name].total_time = (total_time + query_dict[trace_name].total_time)/2
        #     query_dict[trace_name].total_num = (total_num + query_dict[trace_name].total_num)/2
        #     query_dict[trace_name].total_bytes = (total_bytes + query_dict[trace_name].total_bytes) / 2
        #     query_dict[trace_name].burst_num = (burst_num + query_dict[trace_name].burst_num) / 2
        #     query_dict[trace_name].in_burst_num = (in_burst_num + query_dict[trace_name].in_burst_num) / 2
        #     query_dict[trace_name].in_burst_ratio = (in_burst_ratio + query_dict[trace_name].in_burst_ratio) / 2
        #     query_dict[trace_name].out_burst_num = (out_burst_num + query_dict[trace_name].out_burst_num) / 2
        #     query_dict[trace_name].out_burst_ratio = (out_burst_ratio + query_dict[trace_name].out_burst_ratio) / 2
        #     query_dict[trace_name].out_burst_bandwidth = (out_burst_bandwidth + query_dict[trace_name].out_burst_bandwidth) / 2
        #     query_dict[trace_name].in_burst_bandwidth = (in_burst_bandwidth + query_dict[trace_name].in_burst_bandwidth) / 2
        #     query_dict[trace_name].max_in_bandwidth = (max_in_bandwidth + query_dict[trace_name].max_in_bandwidth) / 2
        #     query_dict[trace_name].max_out_bandwidth = (max_out_bandwidth + query_dict[trace_name].max_out_bandwidth) / 2
        #     query_dict[trace_name].incoming_num = (incoming_num + query_dict[trace_name].incoming_num) / 2
        #
        #     query_dict[trace_name].in_num_ratio = (in_num_ratio + query_dict[trace_name].in_num_ratio) / 2
        #     query_dict[trace_name].outgoing_num = (outgoing_num + query_dict[trace_name].outgoing_num) / 2
        #     query_dict[trace_name].out_num_ratio = (out_num_ratio + query_dict[trace_name].out_num_ratio) / 2
        #     query_dict[trace_name].incoming_bytes = (incoming_bytes + query_dict[trace_name].incoming_bytes) / 2
        #     query_dict[trace_name].in_bytes_ratio = (in_bytes_ratio + query_dict[trace_name].in_bytes_ratio) / 2
        #     query_dict[trace_name].out_bytes_ratio = (out_bytes_ratio + query_dict[trace_name].out_bytes_ratio) / 2
        #     query_dict[trace_name].outgoing_bytes = (outgoing_bytes + query_dict[trace_name].outgoing_bytes) / 2

        if m % 100 == 0:
            print(m)