Example #1
0
def batchify(data, labels, bsz):
    # Work out how cleanly we can divide the dataset into bsz parts.
    nbatch = (data.size(0) * data.size(1)) // bsz
    # Trim off any extra elements that wouldn't cleanly fit (remainders).
    data = data.view(-1).narrow(0, 0, nbatch * bsz)
    labels = labels.view(-1).narrow(0, 0, nbatch * bsz)

    # Evenly divide the data across the bsz batches.
    data = data.view(bsz, -1).t().contiguous()
    labels = labels.view(bsz, -1).t().contiguous()

    return data.to(device), labels.to(device)
Example #2
0
def batchify(data, batch_size):
    """
    Divide the data into batch size

    From sequential data, batchify arranges the dataset into columns

    Example:
        a g m s
        b h n t
        c i o u
        d j p v
        e k q w
        f l r x

        Each of the column is treated independently. This means that
        the depednece of e. g. "g" on "f" cannot be learned, but allows
        for more efficient batch processing

    Args:
        data: List of Tensors, this  are ids obtained after tokenization
        batch_size: Int, size of the batch

    Return:
        batched ids, list of tensors
    """
    # Split the data into
    num_batch = data.size(0) // batch_size

    # Trim off excess elements that do not fit
    data = data.narrow(0, 0, num_batch * batch_size)

    # Evenly divide data across batches
    data = data.view(batch_size, -1).t().contiguous()

    return data.to(device)
Example #3
0
def batchify(data, bsz):
    nbatch = data.size(0) // bsz 																				# Work out how cleanly we can divide the dataset into bsz parts.
    data   = data.narrow(0, 0, nbatch * bsz)																	# Trim off any extra elements that wouldn't cleanly fit (remainders).
    data   = data.view(bsz, -1).t().contiguous()																# Evenly divide the data across the bsz batches.
    if args.cuda:																								# If we can do this on the gpu
        data = data.cuda()																						# Then move the data to the gpu
    return data 																								# Return the answer
def batchify(data, bsz):
    '''Copied from starter code main.py file '''

    nbatch = data.size(0) // bsz
    data = data.narrow(0, 0, nbatch * bsz)
    data = data.view(bsz, -1).t().contiguous()
    return data
Example #5
0
def batchify(data, bsz):
    nbatch = data.size(0) // bsz
    data = data.narrow(0, 0, nbatch * bsz)
    data = data.view(bsz, -1).t().contiguous()
    if args.cuda:
        data = data.cuda()
    return data
def batchify(data, bsz):
    # Work out how cleanly we can divide the dataset into bsz parts.
    nbatch = data.size(0) // bsz
    # Trim off any extra elements that wouldn't cleanly fit (remainders).
    data = data.narrow(0, 0, nbatch * bsz)
    # Evenly divide the data across the bsz batches.
    data = data.view(bsz, -1).t().contiguous()
    return data.to(device)
Example #7
0
def batchify(data, bsz):
    # Work out how cleanly we can divide the dataset into bsz parts.
    nbatch = data.size(0) // bsz #GAM~ Size of the tensor 'data' // (floor division - retuns the integer of the quotient) diveded by 'bsz' (batch size)
    # Trim off any extra elements that wouldn't cleanly fit (remainders). ##GAM~ The ones that are excluded from the // (flor division)
    data = data.narrow(0, 0, nbatch * bsz)
    # Evenly divide the data across the bsz batches. #GAM~ Makes a matix size nbatch x bzs (batch size, default 20)
    data = data.view(bsz, -1).t().contiguous()
    return data.to(device)
Example #8
0
def batchify(data, bsz):
    # Work out how cleanly we can divide the dataset into bsz parts.
    nbatch = data.size(0) // bsz
    # Trim off any extra elements that wouldn't cleanly fit (remainders).
    data = data.narrow(0, 0, nbatch * bsz)
    # Evenly divide the data across the bsz batches.
    data = data.view(bsz, -1).t().contiguous()
    return data.cuda() if args.cuda else data
Example #9
0
def batchify(data, bsz):
    print("batchify")
    nbatch = data.size(0) // bsz
    data = data.narrow(0, 0, nbatch * bsz)
    data = data.view(bsz, -1).t().contiguous()
    if USE_CUDA:
        data = data.cuda()
    return data
Example #10
0
def custom_batchify(data, bsz):
    # Work out how cleanly we can divide the dataset into bsz parts.
    nbatch = data.size(0) // bsz
    # Trim off any extra elements that wouldn't cleanly fit (remainders).
    data = data.narrow(0, 0, nbatch * bsz)
    # Evenly divide the data across the bsz batches.
    data = data.view(-1, bsz, EMBEDDING_SIZE).contiguous()
    return data.to(device)
Example #11
0
def batchify(data, bsz):
    # work out how cleanly we can divide the dataset into bsz parts
    nbatch = data.size(0) // bsz # floor operator
    # trim off any extra elements that wouldn't cleanly fit (trim off remainders)
    data = data.narrow(0, 0, nbatch * bsz)
    # evenly divide the data across the bsz batches
    data = data.view(bsz, -1).t().contiguous()
    return data.to(device)
def batchify(data, batch_size):
    # Work out how cleanly we can divide the dataset into batch_size parts.
    nbatch = data.size(0) // batch_size
    # Trim off any extra elements that wouldn't cleanly fit (remainders).
    data = data.narrow(0, 0, nbatch * batch_size)
    # Evenly divide the data across the batch_size batches.
    data = data.view(batch_size, -1).t().contiguous()
    return data.to(device)
Example #13
0
def batchify(data, bsz):
    # Work out how cleanly we can divide the dataset into bsz parts.
    # Tensor.size(n), 获取第n维的大小,0代表第一个维度
    # 此处数据为一维Tensor

    # bsz表示batch的大小
    # nbatch表示batch的数量
    # 注意这里用了整除符号,可能会有多余的数据,这将在下面处理
    nbatch = data.size(0) // bsz
    # Tensor.narrow(dim, start, length)函数说明:
    # 对与2*3*4*5的矩阵而言, dim范围为[-4,3], 0表示取第一维也就是2这个数字对应的
    # 1表示取第二维也就是3这个数字对应的,3表示第三维也就是4这个数字对应的...
    # 在特定维度里取的范围为[start,start+length)
    # 例如:
    # x = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]])为2*3的矩阵
    # torch.narrow(x, 0, 0, 2)===>
    # tensor([[ 1,  2,  3],
    #         [ 4,  5,  6]])
    # torch.narrow(x, 1, 1, 2)===>
    # tensor([[ 2,  3],
    #         [ 5,  6],
    #         [ 8,  9]])

    # Trim off any extra elements that wouldn't cleanly fit (remainders).
    # 截取多余的数据,只留下 batch大小*batch数量 的数据
    # 从第一个维度截取顺序不会乱掉
    data = data.narrow(0, 0, nbatch * bsz)
    # Evenly divide the data across the bsz batches.
    # view这个函数,基本上可以进行Tensor的reshape操作, -1表示维度自动推断
    # t()这个函数实现二维矩阵的转置功能, 注意输入必须是二维矩阵,2*3转换为3*2

    # 需要注意的是reshape与转置有着明显的顺序不同, 比如下面的例子尺寸相同,但是顺序不同
    # x = torch.Tensor(2,3)
    # x.reshape(3,2) != x.t()

    # data.view(bsz, -1)这里batch取得是列而不是行
    # 之后调用.t()函数做了一下转置batch变为行

    # 所以问题来了,为什么不直接reshape为(nbatch, bsz)?
    # 好像是刻意打乱顺序
    # 1 2 3 4 5 6 7 8 9 10 11 12
    # == == == == == == == == == == == == == ==
    # call view()
    # bsz = 4
    # nbatch = 3
    # 1   2   3
    # 4   5   6
    # 7   8   9
    # 10  11  12
    # == == == == == == == == == == == == == ==
    # call t()
    # 1   4   7   10
    # 2   5   8   11
    # 3   6   9   12
    # == == == == == == == == == == == == == ==
    # contiguous()?
    data = data.view(bsz, -1).t().contiguous()
    return data.to(device)
Example #14
0
def batchify(data, bsz):  # 这里的data类型是Tensor
    nbatch = data.size(0) // bsz  # 总词数除以batch_size得到需要的batch轮数
    data = data.narrow(0, 0, nbatch *
                       bsz)  # 第一个参数用来确定行(0)、列(1),第二个参数确定开始的行/列,第三个参数确定行数/列数
    data = data.view(bsz, -1).t().contiguous(
    )  # the size -1 is inferred from other dimensions, view成bsz行nbatch列,经过t()行列颠倒成nbatch行bsz列
    if args.cuda:
        data = data.cuda()
    return data  # 返回的是nbatch行bsz列的矩阵
Example #15
0
def batchify(data, bsz):
    # Work out how cleanly we can divide the dataset into bsz parts.
    nbatch = data.size(0) // bsz
    # Trim off any extra elements that wouldn't cleanly fit (remainders).
    data = data.narrow(0, 0, nbatch * bsz)
    # Evenly divide the data across the bsz batches.
    data = data.view(bsz, -1).t().contiguous()
    # Turning the data over to CUDA at this point may lead to more OOM errors
    return data.to(device)
def batchify(data):
    '''
    data make it ready for getting batches
    Input: (number of characters)
    Output: (batch_size, -1)
    '''
    nbatch = data.shape[0] // args.batch_size
    data = data[:nbatch * args.batch_size]
    return data.view(args.batch_size, -1).to(device)
Example #17
0
def batchify(data, bsz):
    # Work out how cleanly we can divide the dataset into bsz parts.
    nbatch = data.size(0) // bsz
    # Trim off any extra elements that wouldn't cleanly fit (remainders).
    data = data.narrow(0, 0, nbatch * bsz)
    # Evenly divide the data across the bsz batches.
    data = data.view(bsz, -1).t().contiguous()
    # print("data.shape", data.shape, "nbatch", nbatch, "bsz", bsz)
    return data.to(device)
Example #18
0
def batchify(data, bsz, _device):
    # Work out how cleanly we can divide the dataset into bsz parts.
    nbatch = data.size(0) // bsz
    # Trim off any extra elements that wouldn't cleanly fit (remainders).
    # The dimension 'dim' is narrowed from 'start' to 'start + length'
    data = data.narrow(0, 0, nbatch * bsz)
    # Evenly divide the data across the bsz batches.
    data = data.view(bsz, -1).t().contiguous()
    return data.to(_device)
Example #19
0
def batchify(data, bsz):
    # Work out how cleanly we can divide the dataset into bsz parts.
    nbatch = data.size(0) // bsz
    # Trim off any extra elements that wouldn't cleanly fit (remainders).
    data = data.narrow(0, 0, nbatch * bsz)
    # Evenly divide the data across the bsz batches.
    data = data.view(bsz, -1).t().contiguous()
    #need to divide by args.bptt to make sure seq_len fits for nn_model
    return data.to(device)
Example #20
0
def batchify(data, embind, bsz):
    # Work out how cleanly we can divide the dataset into bsz parts.
    nbatch = data.size(0) // bsz
    # Trim off any extra elements that wouldn't cleanly fit (remainders).
    data = data.narrow(0, 0, nbatch * bsz)
    embind = embind.narrow(0, 0, nbatch * bsz)
    # Evenly divide the data across the bsz batches.
    data = data.view(bsz, -1).t().contiguous()
    embind = embind.view(bsz, -1, embind.size(1)).transpose(0, 1).contiguous()
    return data.to(device), embind.to(device)
Example #21
0
def batchify(data, bsz):
    print('batching data ...', end='')
    # Work out how cleanly we can divide the dataset into bsz parts.
    nbatch = data.size(0) // bsz
    # Trim off any extra elements that wouldn't cleanly fit (remainders).
    data = data.narrow(0, 0, nbatch * bsz)
    # Evenly divide the data across the bsz batches.
    data = data.view(bsz, -1).t().contiguous()
    print(data.size())
    return data.to(device)
Example #22
0
def batchify(data, batch_size):
    # Get batch number for the data
    nbatch = data.size(0) // batch_size
    # Trim off any extra elements that wouldn't cleanly fit (remainders).
    data = data.narrow(0, 0, nbatch * batch_size)
    # Evenly divide the data across the bsz batches.
    # dim: (nbatch, batch_size)
    data = data.view(batch_size, -1).t().contiguous()
    if args.cuda:
        data = data.cuda()
    return data
Example #23
0
def batchify_cdata(
        data, bsz):  # note: function only works on contiguos data, due to view
    # Work out how cleanly we can divide the dataset into bsz parts.
    nbatch = data.size(0) // bsz
    ndatadims = data.size(1)
    # Trim off any extra elements that wouldn't cleanly fit (remainders).
    data = data.narrow(0, 0, nbatch * bsz)
    # Evenly divide the data across the bsz batches.
    data = np.transpose(data.view(bsz, -1, ndatadims), [1, 0, 2]).contiguous()
    if args.cuda:
        data = data.cuda()
    return data
Example #24
0
def batchify(data, bsz, random_start_idx=False):
    # Work out how cleanly we can divide the dataset into bsz parts.
    nbatch = data.size(0) // bsz
    # Shuffle data
    if random_start_idx:
        start_idx = random.randint(0, data.size(0) % bsz - 1)
    else:
        start_idx = 0
    # Trim off any extra elements that wouldn't cleanly fit (remainders).
    data = data.narrow(0, start_idx, nbatch * bsz)
    # Evenly divide the data across the bsz batches
    data = data.view(bsz, -1).t().contiguous()
    return data.to(device)
Example #25
0
def batchify(data, bsz):
    # data : [len(train.txt),]
    # Work out how cleanly we can divide the dataset into bsz parts.
    # data.size(0) == len(tokes) + 1 ('<eos>')
    nbatch = data.size(0) // bsz  # 取商
    # Trim off any extra elements that wouldn't cleanly fit (remainders).
    # 暂时不懂,效果是把剩下的余数部分数据扔掉
    data = data.narrow(0, 0, nbatch * bsz)
    # Evenly divide the data across the bsz batches.
    # .t()取转置
    # .contiguous() 返回一个内存连续的有相同数据的tensor,如果原tensor内存连续则返回原tensor
    data = data.view(bsz, -1).t().contiguous()
    return data.to(device)
Example #26
0
def batchify(data, bsz):
    if args.model == "FNN":
        # Implement sliding window to generate data in sizes of bsz
        data = [np.array(data[i:i+bsz]) for i in range(data.shape[0] - bsz + 1)]
        data = torch.Tensor(data).to(torch.int64)
        return data.to(device)
    else:
        # Work out how cleanly we can divide the dataset into bsz parts.
        nbatch = data.size(0) // bsz
        # Trim off any extra elements that wouldn't cleanly fit (remainders).
        data = data.narrow(0, 0, nbatch * bsz)
        # Evenly divide the data across the bsz batches.
        data = data.view(bsz, -1).t().contiguous()
        return data.to(device)     
Example #27
0
def batchify(data, bptt, bsz):
    #parisa's Modification
    lcm=int(bptt*bsz)
    print ('number of tokens in data tensor for each batch is {}'.format(lcm))
    #Parisa's Modification
    
    # Work out how cleanly we can divide the dataset into bsz parts.
    nbatch = data.size(0) // lcm
    # Trim off any extra elements that wouldn't cleanly fit (remainders).
    data = data.narrow(0, 0, nbatch * lcm)
    
    #Parisa's Modification
    # Evenly divide the data across the bsz batches.
    data = data.view(-1, bptt).contiguous()
    #Parisa's Modification
    
    return data.to(device)
Example #28
0
def batchify(data, bsz):
    ''' Starting from sequential data, batchify arranges the dataset into columns.
    For instance, with the alphabet as the sequence and batch size 4, we'd get
    a g m s
    b h n t
    c i o u
    d j p v
    e k q w
    f l r x
    These columns are treated as independent by the model, which means that the
    dependence of e. g. 'g' on 'f' can not be learned, but allows more efficient
    batch processing.
    '''
    # Work out how cleanly we can divide the dataset into bsz parts.
    nbatch = data.size(0) // bsz
    # Trim off any extra elements that wouldn't cleanly fit (remainders).
    data = data.narrow(0, 0, nbatch * bsz)
    # Evenly divide the data across the bsz batches.
    data = data.view(bsz, -1).t().contiguous()
    # Turning the data over to CUDA at this point may lead to more OOM errors
    return data.to(device)
Example #29
0
def batchify(data, bsz):
    """Starting from sequential data, batchify arranges the dataset into columns.
    For instance, with the alphabet as the sequence and batch size 4, we'd get
    ┌ a g m s ┐
    │ b h n t │
    │ c i o u │
    │ d j p v │
    │ e k q w │
    └ f l r x ┘.
    These columns are treated as independent by the model, which means that the
    dependence of e.g. 'g' on 'f' can not be learned, but allows more efficient
    batch processing."""
    nbatch = data.size(
        0
    ) // bsz  # Work out how cleanly we can divide the dataset into bsz parts.
    data = data.narrow(
        0, 0, nbatch * bsz
    )  # Trim off any extra elements that wouldn't cleanly fit (remainders).
    data = data.view(
        bsz,
        -1).t().contiguous()  # Evenly divide the data across the bsz batches.
    return data.to(device)
Example #30
0
def demo(num_sentences, len):
    model.eval()
    hidden = model.init_hidden(num_sentences)
    data = torch.LongTensor([corpus.dictionary.word2idx['<eos>']] *
                            num_sentences).view(1, num_sentences)
    data = Variable(data, volatile=True)
    print(data)
    sentences = []
    for i in range(num_sentences):
        sentences.append([])
    for i in range(len):
        output_prob, sememe_prob, hidden = model(data, hidden)
        dist = torch.distributions.Categorical(output_prob)
        data = dist.sample()
        #print(data)
        print('generating sentences ... {}/{}'.format(i + 1, len))
        for i in range(num_sentences):
            sentences[i].append(corpus.dictionary.idx2word[data.data[i]])
        data = data.view(1, num_sentences)
        hidden = repackage_hidden(hidden)
    for i in range(num_sentences):
        print(' '.join(sentences[i]))
Example #31
0
def batchify(data, bsz):
    # Work out how cleanly we can divide the dataset into bsz parts.
    if isinstance(data, tuple):
        nbatch = data[0].size(0) // bsz
        # Trim off any extra elements that wouldn't cleanly fit (remainders).
        tag_data = data[1].narrow(0, 0, nbatch * bsz)
        data = data[0].narrow(0, 0, nbatch * bsz)
        # Evenly divide the data across the bsz batches.
        tag_data = tag_data.view(bsz, -1).t().contiguous()
    else:
        nbatch = data.size(0) // bsz
        # Trim off any extra elements that wouldn't cleanly fit (remainders).
        data = data.narrow(0, 0, nbatch * bsz)

    # Evenly divide the data across the bsz batches.
    data = data.view(bsz, -1).t().contiguous()
    # Turning the data over to CUDA at this point may lead to more OOM errors
    #if args.cuda:
    #    data = data.cuda()
    if isinstance(data, tuple):
        return data, tag_data
    return data