コード例 #1
0
ファイル: data_load.py プロジェクト: h0wl/BrowserFuzzing
def get_batch_gen(maxlen1, maxlen2, vocab_fpath, batch_size, shuffle=False):
    '''Gets training / evaluation mini-batches
    fpath1: source file path. string.
    fpath2: target file path. string.
    maxlen1: source sent maximum length. scalar.
    maxlen2: target sent maximum length. scalar.
    vocab_fpath: string. vocabulary file path.
    batch_size: scalar
    shuffle: boolean

    Returns
    batches
    num_batches: number of mini-batches
    num_samples
    '''
    # 根据路径获取数据并进行筛选,sents1 = list[str]
    original = """
    ▁function ( b ) ▁{ ▁var ▁c ▁= ▁b . browser . supports Class List (), ▁a ▁= ▁b . dom ; ▁a . addClass ▁= ▁function ( b , ▁e ) ▁{ ▁if ▁( c ) ▁return ▁b . classList . add ( e ); ▁a . hasClass ( b , ▁e ) ▁|| ▁( b . className ▁+= ▁" ▁" ▁+ ▁e ); ▁}; ▁a . removeClass ▁= ▁function ( a , ▁b ) ▁{ ▁if ▁( c ) ▁return ▁a . classList . remove ( b ); ▁a . className ▁= ▁a . className . replace (
    """

    sents1 = [original.strip()]
    sents2 = [original.strip()]

    # 返回一个 tf.dataset
    batches = input_fn(sents1,
                       sents2,
                       vocab_fpath,
                       batch_size,
                       shuffle=shuffle)

    # 根据总数据量和batch_size,计算 batches
    num_batches = calc_num_batches(len(sents1), batch_size)

    return batches, num_batches, len(sents1)
コード例 #2
0
def get_batch(fpath1,
              fpath2,
              maxlen1,
              maxlen2,
              vocab_fpath,
              batch_size,
              shuffle=False):
    '''Gets training / evaluation mini-batches
    fpath1: source file path. string.
    fpath2: target file path. string.
    maxlen1: source sent maximum length. scalar.
    maxlen2: target sent maximum length. scalar.
    vocab_fpath: string. vocabulary file path.
    batch_size: scalar
    shuffle: boolean

    Returns
    batches
    num_batches: number of mini-batches
    num_samples
    '''
    sents1, sents2 = load_data(fpath1, fpath2, maxlen1, maxlen2)
    batches = input_fn(sents1,
                       sents2,
                       vocab_fpath,
                       batch_size,
                       shuffle=shuffle)
    num_batches = calc_num_batches(len(sents1), batch_size)
    return batches, num_batches, len(sents1)
コード例 #3
0
ファイル: data_load.py プロジェクト: yugenlgy/DGCNN
def get_batch(fpath,
              maxlen1,
              maxlen2,
              vocab_fpath,
              batch_size,
              gpu_nums,
              shuffle=False):
    """
  Gets training / evaluation mini-batches
    fpath1: source file path. string.
    fpath2: target file path. string.
    maxlen1: source sent maximum length. scalar.
    maxlen2: target sent maximum length. scalar.
    vocab_fpath: string. vocabulary file path.
    batch_size: scalar
    shuffle: boolean

    Returns
    batches
    num_batches: number of mini-batches
    num_samples
    """
    questions, evidences, labels = _load_data(fpath, maxlen1, maxlen2)
    batches = _input_fn(questions,
                        evidences,
                        labels,
                        vocab_fpath,
                        batch_size,
                        gpu_nums,
                        maxlen1,
                        maxlen2,
                        shuffle=shuffle)
    num_batches = calc_num_batches(len(questions), batch_size * gpu_nums)
    return batches, num_batches, len(questions)
コード例 #4
0
def get_batch(fpath1,
              fpath2,
              maxlen1,
              maxlen2,
              vocab_fpath,
              batch_size,
              shuffle=False):
    '''获取training / evaluation mini-batches
    fpath1: 源文件路径 string.
    fpath2: 目标文件路径 string.
    maxlen1: source sent maximum length. scalar.
    maxlen2: target sent maximum length. scalar.
    vocab_fpath: string. vocabulary file path.
    batch_size: scalar
    shuffle: boolean
    '''
    sents1, sents2 = load_data(fpath1, fpath2, maxlen1,
                               maxlen2)  # 利用input_fn()函数返回数据集生成器对象batches
    batches = input_fn(sents1,
                       sents2,
                       vocab_fpath,
                       batch_size,
                       shuffle=shuffle)
    num_batches = calc_num_batches(
        len(sents1), batch_size
    )  # 根据样本总数和batch_size的大小计算出所需要的batch数目num_batches,以及样本总数len(sents1)。
    return batches, num_batches, len(sents1)
コード例 #5
0
def get_batch(csv_path, batch_size, vocabs=vocabs, shuffle=True):
    df = pd.read_csv(csv_path)
    epitopes = df.epitope.apply(str).tolist()
    cdr3s = df.cdr3.apply(str).tolist()
    batches = input_fn(epitopes, cdr3s, vocabs, batch_size, shuffle=shuffle)
    num_batches = calc_num_batches(len(epitopes), batch_size)

    return batches, num_batches, len(epitopes)
コード例 #6
0
def get_batch_for_inference(epitopes, batch_size, vocabs=vocabs, shuffle=True):
    """
    epitopes: list of str
    """
    batches = input_fn(epitopes, epitopes, vocabs, batch_size, shuffle=shuffle)
    num_batches = calc_num_batches(len(epitopes), batch_size)

    return batches, num_batches, len(epitopes)
コード例 #7
0
ファイル: data_load.py プロジェクト: Traeyee/DickLearning
def get_batch(fpath,
              task_type,
              input_indices,
              vocabs,
              context,
              batch_size,
              shuffle=False):
    """More standarlized, recommended"""
    instances = load_data(fpath)
    batches = input_fn(instances, task_type, input_indices, vocabs, context,
                       batch_size, shuffle)
    num_batches = calc_num_batches(len(instances), batch_size)
    return batches, num_batches, len(instances)
コード例 #8
0
def get_batch(fpath1,
              fpath2,
              maxlen1,
              maxlen2,
              vocab_fpath,
              batch_size,
              shuffle=False):
    sents1, sents2 = load_data(fpath1, fpath2, maxlen1, maxlen2)
    batches = input_fn(sents1,
                       sents2,
                       vocab_fpath,
                       batch_size,
                       shuffle=shuffle)
    num_batches = calc_num_batches(len(sents1), batch_size)
    return batches, num_batches, len(sents1)
コード例 #9
0
ファイル: data_load.py プロジェクト: Traeyee/DickLearning
def get_batch_sim(fpath,
                  maxlen1,
                  maxlen2,
                  vocab_fpath,
                  batch_size,
                  shuffle=False):
    """Dssm style task"""
    sents1, sents2, scores = load_data2(fpath, maxlen1, maxlen2)
    batches = input_fn_sim(sents1,
                           sents2,
                           scores,
                           vocab_fpath,
                           batch_size,
                           shuffle=shuffle)
    num_batches = calc_num_batches(len(sents1), batch_size)
    return batches, num_batches, len(sents1)
コード例 #10
0
ファイル: data_load.py プロジェクト: qsong4/toy_uda
def get_batch_sup(features, batch_size, shuffle=True):

    inputs_a, a_lens, related_labels = features

    instance_len = len(inputs_a)
    num_batches = calc_num_batches(instance_len, batch_size)

    if shuffle:
        indices = np.random.permutation(np.arange(instance_len))
        inputs_a = inputs_a[indices]
        a_lens = a_lens[indices]
        related_labels = related_labels[indices]

    for i in range(num_batches):
        start_id = i * batch_size
        end_id = min((i + 1) * batch_size, instance_len)
        yield (inputs_a[start_id:end_id], a_lens[start_id:end_id],
               related_labels[start_id:end_id])
コード例 #11
0
ファイル: data_load.py プロジェクト: qsong4/toy_uda
def get_batch_unsup(features, batch_size, shuffle=True):
    ori_input, ori_lens, aug_input, aug_lens = features

    instance_len = len(ori_input)
    num_batches = calc_num_batches(instance_len, batch_size)

    if shuffle:
        indices = np.random.permutation(np.arange(instance_len))
        ori_input = ori_input[indices]
        ori_lens = ori_lens[indices]
        aug_input = aug_input[indices]
        aug_lens = aug_lens[indices]

    for i in range(num_batches):
        start_id = i * batch_size
        end_id = min((i + 1) * batch_size, instance_len)
        yield (ori_input[start_id:end_id], ori_lens[start_id:end_id],
               aug_input[start_id:end_id], aug_lens[start_id:end_id])
コード例 #12
0
ファイル: data_load.py プロジェクト: OuyKai/KEPN
def get_batch(fpath1,
              fpath2,
              maxlen1,
              maxlen2,
              vocab_fpath,
              paraphrased_fpath,
              batch_size,
              shuffle=False,
              paraphrase_type=0):
    sents1, sents2, paraphrased_pairs = load_data(fpath1, fpath2,
                                                  paraphrased_fpath, maxlen1,
                                                  maxlen2)
    batches = input_fn(sents1,
                       sents2,
                       paraphrased_pairs,
                       vocab_fpath,
                       batch_size,
                       shuffle=shuffle,
                       paraphrase_type=paraphrase_type)
    num_batches = calc_num_batches(len(sents1), batch_size)
    return batches, num_batches, len(sents1)