Ejemplo n.º 1
0
def generate_cfg_pair(pair_list):
    cfgs_1 = []
    cfgs_2 = []
    logging.info('generate cfg pair...')
    for pair in tqdm(pair_list):
        # logging.debug('pair_1: {}'.format(pair[0]))

        graph_cfg = nx.read_adjlist(os.path.join(config.FEA_DIR, pair[0] + '_cfg.txt'))
        # logging.debug('num graph nodes: {}'.format(graph_cfg.nodes()))

        adj_arr = np.array(nx.convert_matrix.to_numpy_matrix(graph_cfg, dtype=float))
        # logging.debug(adj_arr)
        # logging.debug('adj_arr: {}'.format(list(itertools.chain.from_iterable(adj_arr))))

        adj_str = adj_arr.astype(np.string_)
        # logging.debug('adj_str: {}'.format(list(itertools.chain.from_iterable(adj_str))))
        # logging.debug('cfg:{}'.format(b','.join(list(itertools.chain.from_iterable(adj_str)))))

        cfgs_1.append(b','.join(list(itertools.chain.from_iterable(adj_str))))

        graph_cfg = nx.read_adjlist(os.path.join(config.FEA_DIR, pair[1] + '_cfg.txt'))
        adj_arr = np.array(nx.convert_matrix.to_numpy_matrix(graph_cfg, dtype=float))
        adj_str = adj_arr.astype(np.string_)
        cfgs_2.append(b','.join(list(itertools.chain.from_iterable(adj_str))))
    return cfgs_1, cfgs_2
Ejemplo n.º 2
0
def gen_tfrecord_and_save_i2v_funcsim(save_file, pair_list, label_list):
    cfgs_1, cfgs_2, dfgs_1, dfgs_2, feas_1, feas_2, nums_1, nums_2, max_size = \
                                construct_learning_dataset_i2v_funcsim(pair_list)
    node_list = np.linspace(max_size, max_size, len(label_list), dtype=int)
    writer = tf.python_io.TFRecordWriter(save_file)
    logging.info('generate tfrecord and save to {}'.format(save_file))
    for item1, item2, item3, item4, item5, item6, item7, item8, item9, item10 \
            in tqdm(zip(label_list, cfgs_1, cfgs_2, dfgs_1, dfgs_2, feas_1, \
                        feas_2, nums_1, nums_2, node_list)):
        feature = {
            'label': tf.train.Feature(int64_list = tf.train.Int64List(value=[item1])),
            'cfg_1': tf.train.Feature(bytes_list = tf.train.BytesList(value=[item2])),
            'cfg_2': tf.train.Feature(bytes_list = tf.train.BytesList(value=[item3])),
            'dfg_1': tf.train.Feature(bytes_list = tf.train.BytesList(value=[item4])),
            'dfg_2': tf.train.Feature(bytes_list = tf.train.BytesList(value=[item5])),
            'fea_1': tf.train.Feature(bytes_list = tf.train.BytesList(value=[item6])),
            'fea_2': tf.train.Feature(bytes_list = tf.train.BytesList(value=[item7])),
            'num_1': tf.train.Feature(int64_list = tf.train.Int64List(value=[item8])),
            'num_2': tf.train.Feature(int64_list = tf.train.Int64List(value=[item9])),
            'max': tf.train.Feature(int64_list = tf.train.Int64List(value=[item10])),
        }
        features = tf.train.Features(feature = feature)
        example_proto = tf.train.Example(features = features)
        serialized = example_proto.SerializeToString()
        writer.write(serialized)
    writer.close()
Ejemplo n.º 3
0
def generate_feature_pair(pair_list, flag):
    """
    Args:
        flag:
            0: gemini.
            1: vulseeker.
            2: i2v_***.

    """
    left, right = 1, 8
    if flag == 1:
        left, right = 8,16
    elif flag == 2:
        left, right = 1, config.WORD2VEC_EMBEDDING_SIZE + 1

    feas_1 = []
    feas_2 = []
    nums_1 = []
    nums_2 = []
    nodes_length = []
    logging.info('generate feature pair...')
    for pair in tqdm(pair_list):
        num_node_1, fea_1 = __generate_feature_func(pair[0], left, right, flag)
        feas_1.append(fea_1)
        nums_1.append(num_node_1)
        nodes_length.append(num_node_1)
        num_node_2, fea_2 = __generate_feature_func(pair[1], left, right, flag)
        feas_2.append(fea_2)
        nums_2.append(num_node_2)
        nodes_length.append(num_node_2)
    return feas_1, feas_2, np.max(nodes_length), np.array(nums_1), np.array(nums_2) 
Ejemplo n.º 4
0
def generate_cfg_dfg_pair(pair_list):
    cfgs_1 = []
    cfgs_2 = []
    dfgs_1 = []
    dfgs_2 = []
    logging.info('generate cfg & dfg pair...')
    for pair in tqdm(pair_list):
        cfg_1, dfg_1 = __generate_cfg_dfg_func(pair[0])
        cfgs_1.append(cfg_1)
        dfgs_1.append(dfg_1)
        cfg_2, dfg_2 = __generate_cfg_dfg_func(pair[1])
        cfgs_2.append(cfg_2)
        dfgs_2.append(dfg_2)
    return cfgs_1, cfgs_2, dfgs_1, dfgs_2
Ejemplo n.º 5
0
        feature = {
            'label': tf.train.Feature(int64_list = tf.train.Int64List(value=[item1])),
            'cfg_1': tf.train.Feature(bytes_list = tf.train.BytesList(value=[item2])),
            'cfg_2': tf.train.Feature(bytes_list = tf.train.BytesList(value=[item3])),
            'dfg_1': tf.train.Feature(bytes_list = tf.train.BytesList(value=[item4])),
            'dfg_2': tf.train.Feature(bytes_list = tf.train.BytesList(value=[item5])),
            'fea_1': tf.train.Feature(bytes_list = tf.train.BytesList(value=[item6])),
            'fea_2': tf.train.Feature(bytes_list = tf.train.BytesList(value=[item7])),
            'num_1': tf.train.Feature(int64_list = tf.train.Int64List(value=[item8])),
            'num_2': tf.train.Feature(int64_list = tf.train.Int64List(value=[item9])),
            'max': tf.train.Feature(int64_list = tf.train.Int64List(value=[item10])),
        }
        features = tf.train.Features(feature = feature)
        example_proto = tf.train.Example(features = features)
        serialized = example_proto.SerializeToString()
        writer.write(serialized)
    writer.close()

test_and_create_dirs(config.TFRECORD_TRAIN, \
                        config.TFRECORD_VALID, \
                        config.TFRECORD_TEST)

train_pair, train_label, valid_pair, valid_label, test_pair, test_label = load_dataset()
logging.info('generate tfrecord: i2v_funcsim train...')
gen_tfrecord_and_save_i2v_funcsim(config.TFRECORD_TRAIN, train_pair, train_label)
logging.info('generate tfrecord: i2v_funcsim valid...')
gen_tfrecord_and_save_i2v_funcsim(config.TFRECORD_VALID, valid_pair, valid_label)
logging.info('generate tfrecord: i2v_funcsim test...')
gen_tfrecord_and_save_i2v_funcsim(config.TFRECORD_TEST, test_pair, test_label)

Ejemplo n.º 6
0
def calculate_auc(labels, predicts):
    fpr, tpr, thresholds = roc_curve(labels, predicts, pos_label=1)
    AUC = auc(fpr, tpr)
    logging.info("auc : {}".format(AUC))
    return AUC
Ejemplo n.º 7
0
            tr_acc = compute_accuracy(predict, y)

            statis_train_batch_loss.append(loss_value)
            statis_train_batch_acc.append(tr_acc)

            avg_loss += loss_value
            avg_acc += tr_acc
            epoch_avg_loss += loss_value
            epoch_avg_acc += tr_acc

            if (i + 1) % display_step == 0 or i == 0:
                if i > 0:
                    avg_loss /= display_step
                    avg_acc /= display_step
                msg = 'iter: {}, avg_loss: {}, avg_acc: {}'
                logging.info(msg.format(i + 1, avg_loss, avg_acc))
                avg_loss = 0
                avg_acc = 0
        statis_train_epoch_loss.append(epoch_avg_loss / num_iter)
        statis_train_epoch_acc.append(epoch_avg_acc / num_iter)

        if epoch % snapshot == 0:
            total_labels = []
            total_predicts = []
            avg_loss = 0.
            avg_acc = 0.
            valid_start_time = time.time()
            for i in tqdm(range(int(valid_num / B))):
                true_batch_valid = sess.run(list(batch_valid))
                y, cfgs_1, dfgs_1, cfgs_2, dfgs_2, feature_1, feature_2, v_num_1, v_num_2 \
                    = get_batch(*true_batch_valid)