Python Categorical.tolist 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: torch.distributions.categorical

클래스/타입: Categorical

메소드/함수: tolist

hotexamples.com에서의 예제들: 2

Python Categorical.tolist - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 torch.distributions.categorical.Categorical.tolist에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Categorical(30)

entropy(30)

sample(30)

log_prob(30)

unsqueeze(10)

view(9)

item(8)

_new(6)

expand(6)

detach(3)

argmax(2)

long(2)

numpy(2)

tolist(2)

eq(1)

squeeze(1)

to(1)

sum(1)

rsample(1)

split(1)

size(1)

cpu(1)

perplexity(1)

float(1)

mode(1)

예제 #1

파일 보기

def train(meta_decoder, decoder_optimizer, fclayers_for_hyper_params):
    global moving_average
    global moving_average_alpha
    decoder_hidden = meta_decoder.initHidden()
    decoder_optimizer.zero_grad()
    output = torch.zeros([1, 1, meta_decoder.output_size], device=device)
    softmax = nn.Softmax(dim=1)
    softmax_outputs_stored = list()
    loss = 0
    #
    for i in range(3):
        output, decoder_hidden = meta_decoder(output, decoder_hidden)
        #print(hyper_params[i])
        softmax_outputs_stored.append(
            softmax(fclayers_for_hyper_params[hyper_params[i][0]](output)))
        #
    output_interaction = softmax_outputs_stored[-1]
    type_of_interaction = Categorical(output_interaction).sample().tolist()[0]
    if type_of_interaction == 0:
        # PairwiseEuDist
        for i in range(3, 4):
            output, decoder_hidden = meta_decoder(output, decoder_hidden)
            softmax_outputs_stored.append(
                softmax(fclayers_for_hyper_params[hyper_params[i][0]](output)))
    elif type_of_interaction == 1:
        # PairwiseLog
        # no hyper-params for this interaction type
        pass
    else:
        # PointwiseMLPCE
        for i in range(4, 7):
            output, decoder_hidden = meta_decoder(output, decoder_hidden)
            softmax_outputs_stored.append(
                softmax(fclayers_for_hyper_params[hyper_params[i][0]](output)))
    #
    resulted_str = []
    for outputs in softmax_outputs_stored:
        print("softmax_outputs: ", outputs)
        idx = Categorical(outputs).sample()
        resulted_str.append(idx.tolist()[0])
    resulted_str[
        2] = type_of_interaction  # the type of interaction has already been sampled before
    resulted_idx = resulted_str
    resulted_str = "_".join(map(str, resulted_str))
    print("resulted_str: " + resulted_str)
    #
    reward = calc_reward_given_descriptor(resulted_str)
    if moving_average == -19013:
        moving_average = reward
        reward = 0.0
    else:
        tmp = reward
        reward = reward - moving_average
        moving_average = moving_average_alpha * tmp + (
            1.0 - moving_average_alpha) * moving_average
    #
    print("current reward: " + str(reward))
    print("current moving average: " + str(moving_average))
    expectedReward = 0
    for i in range(len(softmax_outputs_stored)):
        logprob = torch.log(softmax_outputs_stored[i][0][resulted_idx[i]])
        expectedReward += logprob * reward
    loss = -expectedReward
    print('loss:', loss)
    # finally, backpropagate the loss according to the policy
    loss.backward()
    decoder_optimizer.step()

예제 #2

파일 보기

def train(meta_decoder, decoder_optimizer, fclayers_for_hyper_params):

    decoder_hidden = meta_decoder.initHidden()
    decoder_optimizer.zero_grad()
    
    output = torch.zeros([1, 1, meta_decoder.output_size], device=device)
    softmax = nn.Softmax(dim=1)
    softmax_outputs_stored = list()
    
    
    loss = 0
    # 
    for i in range(3):
        output, decoder_hidden = meta_decoder(output, decoder_hidden)
        print(hyper_params[i])
        print('output:', output.shape)
        softmax_outputs_stored.append(softmax(fclayers_for_hyper_params[hyper_params[i][0]](output)))
        print('softmax_outputs_stored:', softmax_outputs_stored)
        #
    output_interaction = softmax_outputs_stored[-1]
    type_of_interaction = Categorical(output_interaction).sample().tolist()[0]
    if type_of_interaction == 0:
        # PairwiseEuDist
        for i in range(3, 4):
            output, decoder_hidden = meta_decoder(output, decoder_hidden)
            softmax_outputs_stored.append(softmax(fclayers_for_hyper_params[hyper_params[i][0]](output)))
    elif type_of_interaction == 1:
        # PairwiseLog
        pass
    else:
        # PointwiseMLPCE
        for i in range(4, 7):
            output, decoder_hidden = meta_decoder(output, decoder_hidden)
            softmax_outputs_stored.append(softmax(fclayers_for_hyper_params[hyper_params[i][0]](output)))
    print(len(softmax_outputs_stored))
    
    # 
    resulted_str = []
    for outputs in softmax_outputs_stored:
        print("softmax_outputs: ", outputs)
        # idx = torch.argmax(outputs)
        idx = Categorical(outputs).sample()
        # print('idx:', idx)
        # resulted_str.append(idx.item())
        resulted_str.append(idx.tolist()[0])
    resulted_str[2] = type_of_interaction
    resulted_idx = resulted_str
    resulted_str = "_".join(map(str, resulted_str))
    print("resulted_str:")
    print(resulted_str)
    
    # 
    reward = calc_reward_given_descriptor(resulted_str)
    print("reward: " + str(reward))
    expectedReward = 0
    for i in range(len(softmax_outputs_stored)):
        # print(softmax_outputs_stored[i][0].tolist())
        # print(resulted_idx[i])
        logprob = torch.log(softmax_outputs_stored[i][0][resulted_idx[i]])
        expectedReward += logprob * reward
    loss = - expectedReward   
    print('loss:', loss)
    
    # backpropagate the loss according to the policy
    loss.backward()
    decoder_optimizer.step()