def get_batch_train(source,label, i, seq_len, evaluation=False):
    """get the first 10 tracks in a session"""
        
    from tensorflow.contrib.keras import preprocessing
    # add mask to ignore skipped track and padding tracks
    skip_mask = ((label>=0) *(label<2)).long()
    source = source*skip_mask

    # reshape
    seq_len = min(seq_len, source.size(0) - 1 - i)
    data = source[i:int(i + seq_len/2)] 
    
    # move 0 to left
    data = data.t()
    sessions_list = []
    #pack_len = []
    for session in data: # loop of batch size
        session_remove0 = session[session!=0]
        
        sessions_list.append(session_remove0)
#        # length filter
#        if len(session_remove0)>=5:
#            sessions_list.append(session_remove0)
        #pack_len.append(len(session_remove0)-1) # length 10 to 9 for next word
    data = preprocessing.sequence.pad_sequences(sessions_list,len(session),padding='post',truncating='post')
    data = torch.Tensor(data).long().t()
    if evaluation:
        data.requires_grad = False
    target = torch.cat([data[1:],torch.zeros(data.shape[1]).long().unsqueeze(0)])

    return data, target
def train(epoch, data_source, label):
    model.train()
    total_loss = 0
    start_time = time.time()
    ntokens = len(track_dic)

    for batch, i in enumerate(range(0, data_source.size(0) - 1, seq_len)):

        data, targets = get_batch_train(data_source, label, i, seq_len)
        data = data.t()

        #if data.shape[0]: # to prevent length filter in get_batch_train romove all batches

        optimizer.zero_grad()
        model.hidden = model.init_hidden(
        )  ### This is important, need to be init everytime
        output = model(data)
        output = output.transpose(0, 1)
        targets = targets.contiguous().view(-1)
        final_decoded = output.contiguous().view(-1, nout)

        # remove padding rows
        mask_targets = targets != 0
        targets = targets[targets != 0]
        loc = torch.ByteTensor(mask_targets)  #<IndexBackward>  <ViewBackward>
        final_decoded = final_decoded[loc]

        #        mask_decoded = mask_targets.unsqueeze(1).repeat(1, final_decoded.shape[1])
        #        final_decoded = final_decoded*mask_decoded.float()

        if final_decoded.shape[0]:  #
            loss = criterion(final_decoded, track_weight[targets])

            loss.backward(retain_graph=True)
            optimizer.step()

            total_loss += loss.data
        if batch % log_interval == 0 and batch > 0:
            cur_loss = total_loss.item() / log_interval
            elapsed = time.time() - start_time
            print(
                '| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} |'
                'raw_loss {:5.2f} | ppl {:8.2f}'.format(
                    epoch, batch,
                    len(data_source) // seq_len,
                    lr, elapsed * 1000 / log_interval, cur_loss,
                    math.exp(cur_loss)))
            total_loss = 0
            start_time = time.time()

    output = None
    targets = None
    final_decoded = None

    return None
def rank(data_source, label):
    model.eval() 
    
    with torch.no_grad():
        ndcg_acc = 0
        ndcg_count = 0
        ntokens = len(track_dic)
        batch_size = data_source.size(1)

        for i in range(0, data_source.size(0) - 1, seq_len):

            data, targets = get_batch_past(data_source, label, i, seq_len, evaluation=True)
            data = data.t()
            targets = targets.t()
           
            tracks_future, targets_future = get_batch_future(data_source, label, i, seq_len, evaluation=True)
            tracks_future = tracks_future.t()
            targets_future = targets_future.t()
     
            for j in range(batch_size):
                track_f = tracks_future[j]
                # remove padding elements
                track_f = track_f[track_f!=0]
                score = []
                for ii in tracks_future[j]:
                    if ii!=0:
                        score.append(track_features['us_popularity_estimate'][int(ii)])
                
                # get data frame without padding element
                df_future = pd.DataFrame({'track':np.array(track_f),'score':np.array(score),'skip_info':np.array(targets_future[j][0:len(track_f)])})
                # remove padding elements
                df_future = df_future.loc[df_future['track']!=0]
                
                # sort by popularity
                df_future = df_future.sort_values(by = 'score',ascending=False) #0.8090114383851303
                #sort to the worst case
                #df_future = df_future.sort_values(by = 'skip_info',ascending=False)  #0.6693316113866979
                
                # NDCG
                actual = dcg_score(df_future['skip_info'])
                best = dcg_score(df_future['skip_info'].sort_values(ascending=True))
                
                if best: #best might be 0, while skip_info is 3,3,3,....
                    ndcg = actual/best
                    ndcg_acc = ndcg_acc + ndcg
                else: # avoid nan
                    ndcg_acc = ndcg_acc + 1
                ndcg_count = ndcg_count+1
         
               
        ndcg_avg = ndcg_acc/ndcg_count
    
    return ndcg_avg
Ejemplo n.º 4
0
def evaluate(data_source):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    with torch.no_grad():
        total_loss = 0
        ntokens = len(corpus.dictionary)
        batch_size = data_source.size(1)
        hidden = model.init_hidden(batch_size)
        eff_history_mode = (args.seq_len > args.horizon and not args.repack)

        if eff_history_mode:
            validseqlen = args.seq_len - args.horizon
            seq_len = args.seq_len
        else:
            validseqlen = args.horizon
            seq_len = args.horizon

        processed_data_size = 0
        for i in range(0, data_source.size(0) - 1, validseqlen):
            eff_history = args.horizon if eff_history_mode else 0
            if i + eff_history >= data_source.size(0) - 1: continue
            data, targets = get_batch(data_source, i, seq_len, evaluation=True)

            if args.repack:
                hidden = repackage_hidden(hidden)
            else:
                hidden = model.init_hidden(data.size(1))

            data = data.t()
            net = nn.DataParallel(
                model, device_ids=devices) if batch_size > 10 else model
            (_, output, decoded), hidden, _ = net(data, hidden)
            decoded = decoded.transpose(0, 1)
            targets = targets[eff_history:].contiguous().view(-1)
            final_decoded = decoded[eff_history:].contiguous().view(
                -1, ntokens)

            loss = criterion(final_decoded, targets)
            loss = loss.data

            total_loss += (data.size(1) - eff_history) * loss
            processed_data_size += data.size(1) - eff_history

        output = None
        decoded = None
        targets = None
        final_output = None
        final_decoded = None

        return total_loss.item() / processed_data_size
Ejemplo n.º 5
0
def evaluate(data_source, verbose=False):
    # Turn on evaluation mode which disables dropout.
    if verbose:
        from collections import Counter
        counter = Counter()
        train_file = f"{args.data}/train.txt"
        lines = [
            counter.update(line.strip().split())
            for line in open(train_file, 'r').readlines()
        ]
        #  fh_out = open(args.verbose_test_file, "w")
        verbose_criterion = nn.CrossEntropyLoss(reduce=False)

    model.eval()
    total_loss = 0.
    total_freq_loss = 0.
    total_freq_count = 0
    total_infreq_loss = 0.
    total_infreq_count = 0

    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(eval_batch_size)
    with torch.no_grad():
        for i in range(0, data_source.size(0) - 1, args.bptt):
            data, targets = get_batch(data_source, i)
            output, hidden = model(data, hidden)
            output_flat = output.view(-1, ntokens)
            total_loss += len(data) * criterion(output_flat, targets).item()
            hidden = repackage_hidden(hidden)
            if verbose:
                verbose_loss = verbose_criterion(output_flat, targets)
                verbose_loss = verbose_loss.view(data.size(0), -1)
                print_contents, [freq_loss, freq_count], [
                    infreq_loss, infreq_count
                ] = verbose_test(corpus.dictionary.idx2word, counter, data.t(),
                                 verbose_loss.t())
                total_freq_loss += freq_loss
                total_freq_count += freq_count
                total_infreq_loss += infreq_loss
                total_infreq_count += infreq_count
                #  for print_line in print_contents:
                #  fh_out.write(f"{print_line}\n")

    if verbose:
        #  fh_out.close()
        return math.exp(total_freq_loss / total_freq_count), math.exp(
            total_infreq_loss / total_infreq_count)
    return total_loss / len(data_source)
Ejemplo n.º 6
0
def evaluate(data_source):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    total_loss = 0
    batch_size = data_source.size(1)
    hidden = model.init_hidden(batch_size)
    eff_history_mode = (args.seq_len > args.horizon and not args.repack)

    if eff_history_mode:
        validseqlen = args.seq_len - args.horizon
        seq_len = args.seq_len
    else:
        validseqlen = args.horizon
        seq_len = args.horizon

    processed_data_size = 0
    for i in range(0, data_source.size(0) - 1, validseqlen):
        eff_history = args.horizon if eff_history_mode else 0
        if i + eff_history >= data_source.size(0) - 1: continue
        data, targets = get_batch(data_source, i, seq_len, evaluation=True)

        if args.repack:
            hidden = repackage_hidden(hidden)
        else:
            hidden = model.init_hidden(data.size(1))

        data = data.t()
        net = nn.DataParallel(model) if batch_size > 10 else model
        (_, _, output), hidden, _ = net(data, hidden, decode=False)
        output = output.transpose(0, 1)
        targets = targets[eff_history:].contiguous().view(-1)
        final_output = output[eff_history:].contiguous().view(-1, output.size(2))

        loss = criterion(model.decoder.weight, model.decoder.bias, final_output, targets)
        #loss = loss.data

        total_loss += (data.size(1) - eff_history) * float(loss)
        processed_data_size += data.size(1) - eff_history
        del loss, data, targets
        gc.collect()
        torch.cuda.empty_cache()

    data = None
    output = None
    targets = None
    final_output = None

    return total_loss / processed_data_size
def evaluate(data_source, label_, session_feature):
    model.eval()
    with torch.no_grad():
        total_loss = 0
        ntokens = len(track_dic)
        batch_size = data_source.size(1)

        processed_data_size = 0
        for i in range(0, data_source.size(0) - 1, seq_len):

            data, targets, label, sf = get_batch_train(data_source, label_,
                                                       session_feature, i,
                                                       seq_len)
            data = data.t()
            sf = sf.t()

            model.hidden = model.init_hidden()
            output = model(data, sf)
            output = output.transpose(0, 1)
            targets = targets.contiguous().view(-1)
            label = label.contiguous().view(-1)
            final_decoded = output.contiguous().view(-1, nout + 1)

            # remove padding rows
            #            mask_targets = targets!=0
            #            targets = targets[targets!=0]
            #            label= label[label!=-1]
            #            loc = torch.ByteTensor(mask_targets) #<IndexBackward>  <ViewBackward>
            #            final_decoded = final_decoded[loc]

            if final_decoded.shape[0]:  #
                final_targets = torch.cat(
                    [track_weight[targets],
                     label.unsqueeze(1).float()], dim=1)
                loss = criterion(final_decoded, final_targets)  ######
                loss = loss.data

                total_loss += data.size(1) * loss
                processed_data_size += data.size(1)

        output = None
        targets = None
        final_decoded = None

        return total_loss.item() / processed_data_size
Ejemplo n.º 8
0
def evaluate(loader):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    total_loss = 0.
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(eval_batch_size)
    with torch.no_grad():
        for i, batch in enumerate(loader):
            (data, targets) = batch
            data = data.t()
            targets = targets.t()

            output, hidden = model(data, hidden)
            output_flat = output.view(-1, ntokens)
            total_loss += len(data) * criterion(output_flat,
                                                targets.flatten()).item()
            hidden = repackage_hidden(hidden)
    return total_loss / (len(loader) - 1)
Ejemplo n.º 9
0
def evaluate(data_source, label):
    model.eval()
    with torch.no_grad():
        total_loss = 0
        ntokens = len(track_dic)
        batch_size = data_source.size(1)

        processed_data_size = 0
        for i in range(0, data_source.size(0) - 1, seq_len):

            data, targets = get_batch_train(data_source,
                                            label,
                                            i,
                                            seq_len,
                                            evaluation=True)
            data = data.t()

            model.hidden = model.init_hidden(
                data.shape[0])  # change the batch size
            output = model(data)
            output = output.transpose(0, 1)
            targets = targets.contiguous().view(-1)
            final_decoded = output.contiguous().view(-1, nout)

            # remove padding rows
            mask_targets = targets != 0
            targets = targets[targets != 0]
            loc = torch.ByteTensor(
                mask_targets)  #<IndexBackward>  <ViewBackward>
            final_decoded = final_decoded[loc]

            if final_decoded.shape[0]:  #
                loss = criterion(final_decoded, track_weight[targets])  ######
                loss = loss.data

                total_loss += data.size(1) * loss
                processed_data_size += data.size(1)

        output = None
        targets = None
        final_decoded = None

        return total_loss.item() / processed_data_size
def evaluate(data_source, label_, session_feature):
    model.eval()
    with torch.no_grad():
        total_loss = 0
        ntokens = len(track_dic)
        batch_size = data_source.size(1)

        processed_data_size = 0
        for i in range(0, data_source.size(0) - 1, seq_len):

            data, targets, label, sf = get_batch_train(data_source, label_,
                                                       session_feature, i,
                                                       seq_len)
            data = data.t()
            sf = sf.t()

            model.hidden = model.init_hidden(data.shape[0])
            output = model(data, sf)
            output = output.transpose(0, 1)
            targets = targets.contiguous().view(-1)
            label = label.contiguous().view(-1)
            final_decoded = output.contiguous().view(-1, ntokens)

            # remove skipped rows
            mask_targets = label < 2
            loc = torch.ByteTensor(
                mask_targets)  #<IndexBackward>  <ViewBackward>
            targets = targets[loc]
            final_decoded = final_decoded[loc]

            if final_decoded.shape[0]:  #

                loss = criterion(final_decoded, targets)  ######
                loss = loss.data

                total_loss += data.size(1) * loss
                processed_data_size += data.size(1)

        output = None
        targets = None
        final_decoded = None

        return total_loss.item() / processed_data_size
Ejemplo n.º 11
0
def rank(data_source, label):
    model.eval()

    with torch.no_grad():
        ndcg_acc = 0
        ndcg_count = 0
        ndcg_acc_1 = 0
        ndcg_count_1 = 0
        ndcg_acc_2 = 0
        ndcg_count_2 = 0
        total_loss = 0
        ntokens = len(track_dic)
        batch_size = data_source.size(1)

        for i in range(0, data_source.size(0) - 1, seq_len):

            data, targets = get_batch_past(data_source,
                                           label,
                                           i,
                                           seq_len,
                                           evaluation=True)
            data = data.t()
            targets = targets.t()

            tracks_future, targets_future = get_batch_future(data_source,
                                                             label,
                                                             i,
                                                             seq_len,
                                                             evaluation=True)
            tracks_future = tracks_future.t()
            targets_future = targets_future.t()

            #music_rnn; music_lstm
            model.hidden = model.init_hidden()
            rank_vec = model(data)[:, -1, :]  # batch, ntokens [12, 1, 50704]

            # advoid for loop
            # advoid for loop
            for j in range(batch_size):
                track_f = tracks_future[j]
                # remove padding elements
                #track_f = track_f[track_f!=0]
                cos = nn.CosineSimilarity(dim=1, eps=1e-6)
                score = cos(rank_vec[j].unsqueeze(0), track_weight[track_f])
                # get data frame without padding element
                df_future = pd.DataFrame({
                    'track':
                    np.array(track_f),
                    'score':
                    np.array(score),
                    'skip_info':
                    np.array(targets_future[j][0:len(track_f)])
                })
                # remove padding elements
                df_future = df_future.loc[df_future['track'] != 0]
                # sort tracks_future according to score
                df_future = df_future.sort_values(
                    by='score',
                    ascending=False)  #0.8154440681444343 #0.8227163023038474
                #df_future = df_future.sample(frac=1) # 0.8115378563756852 #0.7787248338261271
                # NDCG
                actual = dcg_score(df_future['skip_info'])
                best = dcg_score(
                    df_future['skip_info'].sort_values(ascending=True))

                if best:  #best might be 0, while skip_info is 3,3,3,....
                    ndcg = actual / best
                    ndcg_acc = ndcg_acc + ndcg
                else:  # avoid nan
                    ndcg_acc = ndcg_acc + 1
                ndcg_count = ndcg_count + 1

                if (targets[j] == 0).sum() < x:
                    track_f = tracks_future[j]
                    cos = nn.CosineSimilarity(dim=1, eps=1e-6)
                    score_1 = cos(rank_vec[j].unsqueeze(0),
                                  track_weight[track_f])
                    df_future_1 = pd.DataFrame({
                        'track':
                        np.array(track_f),
                        'score':
                        np.array(score_1),
                        'skip_info':
                        np.array(targets_future[j][0:len(track_f)])
                    })
                    df_future_1 = df_future_1.loc[df_future_1['track'] != 0]
                    df_future_1 = df_future_1.sort_values(
                        by='score', ascending=False
                    )  #0.8154440681444343 #0.8227163023038474
                    # NDCG
                    actual_1 = dcg_score(df_future_1['skip_info'])
                    best_1 = dcg_score(
                        df_future_1['skip_info'].sort_values(ascending=True))

                    if best:  #best might be 0, while skip_info is 3,3,3,....
                        ndcg_1 = actual_1 / best_1
                        ndcg_acc_1 = ndcg_acc_1 + ndcg_1
                    else:  # avoid nan
                        ndcg_acc_1 = ndcg_acc_1 + 1
                    ndcg_count_1 = ndcg_count_1 + 1
                else:
                    track_f = tracks_future[j]
                    cos = nn.CosineSimilarity(dim=1, eps=1e-6)
                    score_2 = cos(rank_vec[j].unsqueeze(0),
                                  track_weight[track_f])
                    df_future_2 = pd.DataFrame({
                        'track':
                        np.array(track_f),
                        'score':
                        np.array(score_2),
                        'skip_info':
                        np.array(targets_future[j][0:len(track_f)])
                    })
                    df_future_2 = df_future_2.loc[df_future_2['track'] != 0]
                    df_future_2 = df_future_2.sort_values(
                        by='score', ascending=False
                    )  #0.8154440681444343 #0.8227163023038474

                    # NDCG
                    actual_2 = dcg_score(df_future_2['skip_info'])
                    best_2 = dcg_score(
                        df_future_2['skip_info'].sort_values(ascending=True))

                    if best:  #best might be 0, while skip_info is 3,3,3,....
                        ndcg_2 = actual_2 / best_2
                        ndcg_acc_2 = ndcg_acc_2 + ndcg_2
                    else:  # avoid nan
                        ndcg_acc_2 = ndcg_acc_2 + 1
                    ndcg_count_2 = ndcg_count_2 + 1
        ndcg_avg = ndcg_acc / ndcg_count
        ndcg_avg_1 = ndcg_acc_1 / ndcg_count_1
        ndcg_avg_2 = ndcg_acc_2 / ndcg_count_2

    return ndcg_avg, ndcg_avg_1, ndcg_avg_2
Ejemplo n.º 12
0
def evaluate(data_source, label):
    # Turn on evaluation mode which disables dropout.
    model.eval(
    )  # will notify all your layers that you are in eval mode, that way, batchnorm or dropout layers will work in eval mode instead of training mode.
    with torch.no_grad(
    ):  # impacts the autograd engine and deactivate it. It will reduce memory usage and speed up computations but you won’t be able to backprop (which you don’t want in an eval script).
        total_loss = 0
        ntokens = len(track_dic)
        batch_size = data_source.size(1)
        hidden = model.init_hidden(batch_size)
        eff_history_mode = (args.seq_len > args.horizon and not args.repack)

        if eff_history_mode:
            validseqlen = args.seq_len - args.horizon
            seq_len = args.seq_len
        else:
            validseqlen = args.horizon
            seq_len = args.horizon

        processed_data_size = 0
        for i in range(0, data_source.size(0) - 1, args.seq_len):
            eff_history = args.horizon if eff_history_mode else 0
            if i + eff_history >= data_source.size(0) - 1:
                continue
            data, targets = get_batch_train(data_source,
                                            label,
                                            i,
                                            seq_len,
                                            evaluation=True)
            data = data.t()

            if args.repack:  # repack is not work in my new model since each batch batch_size is different
                hidden = repackage_hidden(hidden)
            else:
                hidden = model.init_hidden(data.size(0))

            net = nn.DataParallel(model) if batch_size > 10 else model
            (_, output, decoded), hidden, _ = net(
                data, hidden)  #######  output; decoded => vector
            decoded = decoded.transpose(0, 1)  ####
            targets = targets[eff_history:].contiguous().view(-1)
            final_decoded = decoded[eff_history:].contiguous().view(
                -1, ntokens)

            # remove padding rows
            mask_targets = targets != 0
            targets = targets[targets != 0]
            loc = torch.ByteTensor(
                mask_targets)  #<IndexBackward>  <ViewBackward>
            final_decoded = final_decoded[loc]

            if final_decoded.shape[0]:  #

                loss = criterion(final_decoded, targets)  ######
                loss = loss.data

                total_loss += (data.size(1) - eff_history) * loss
                processed_data_size += data.size(1) - eff_history

        output = None
        decoded = None
        targets = None
        final_output = None
        final_decoded = None

        return total_loss.item() / processed_data_size
Ejemplo n.º 13
0
def train(epoch, data_source, label):
    model.train()
    total_loss = 0
    total_aux_losses = 0
    start_time = time.time()
    ntokens = len(track_dic)
    hidden = model.init_hidden(args.batch_size)
    eff_history_mode = (args.seq_len > 0 or not args.repack)

    if eff_history_mode:
        validseqlen = args.seq_len - args.horizon
        seq_len = args.seq_len
    else:
        validseqlen = args.horizon
        seq_len = args.horizon

    for batch, i in enumerate(range(0, data_source.size(0) - 1, args.seq_len)):
        # When not using repackaging mode, we DISCARD the first arg.horizon outputs in backprop (which are
        # the "effective history".
        eff_history = args.horizon if eff_history_mode else 0
        if i + eff_history >= data_source.size(0) - 1:
            continue
        data, targets = get_batch_train(data_source, label, i, seq_len)
        data = data.t()

        #if data.shape[0]:# to prevent length filter in get_batch_train romove all batches

        if args.repack:  # repack is not work in my new model since each batch batch_size is different
            hidden = repackage_hidden(hidden)
        else:
            hidden = model.init_hidden(data.size(0))

        optimizer.zero_grad()
        net = nn.DataParallel(model) if data.size(0) > 10 else model
        (raw_output, output, decoded), hidden, all_decoded = net(
            data, hidden)  ##########################slow
        # raw_output.shape  = [12, 20, 29]
        # output.shape = [12, 20, 29]
        # decoded.shape = [12, 20, 50704]
        # hidden[0].shape = hidden[1].shape = [12, 1029, 1]
        # all_decoded.shape = [12, 1, 20, 50704]
        decoded = decoded.transpose(0, 1)
        # decoded.shape = [20, 12, 50704]
        targets = targets[eff_history:].contiguous().view(-1)
        # targets.shape = torch.Size([180])
        final_decoded = decoded[eff_history:].contiguous().view(-1, ntokens)
        #final_decoded.shape = torch.Size([180, 50704])

        # remove padding rows
        mask_targets = targets != 0
        targets = targets[targets != 0]
        loc = torch.ByteTensor(mask_targets)  #<IndexBackward>  <ViewBackward>
        final_decoded = final_decoded[loc]

        if final_decoded.shape[0]:
            # Loss 1: CE loss
            raw_loss = criterion(final_decoded, targets)

            #            #qiqi's check
            #            if raw_loss>20:
            #            print(raw_loss)
            #            print(batch)
            #            print(i)
            #            print(final_decoded.shape)
            #            print(targets.shape)

            # Loss 2: Aux loss
            aux_losses = 0
            if args.aux > 0:
                all_decoded = all_decoded[:, :, eff_history:].permute(
                    1, 2, 0, 3).contiguous()  # (N, M, L, C) --> (M, L, N, C)
                aux_size = all_decoded.size(0)
                all_decoded = all_decoded.view(aux_size, -1, ntokens)

                # remove padding rows
                all_decoded = all_decoded.transpose(0, 1)[loc].transpose(0, 1)

                aux_losses = args.aux * sum([
                    criterion(all_decoded[i], targets) for i in range(aux_size)
                ])

            # Combine losses
            loss = raw_loss + aux_losses  #+ alpha_loss + beta_loss
            loss.backward(
                retain_graph=True)  #####################################slow

            torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
            optimizer.step()

            total_loss += raw_loss.data
            if args.aux:
                total_aux_losses += aux_losses.data

        if batch % args.log_interval == 0 and batch > 0:
            cur_loss = total_loss.item() / args.log_interval
            cur_aux_loss = total_aux_losses.item(
            ) / args.log_interval if args.aux else 0
            elapsed = time.time() - start_time
            print(
                '| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
                'raw_loss {:5.2f} | aux_loss {:5.2f} | ppl {:8.2f}'.format(
                    epoch, batch,
                    len(data_source) // validseqlen, lr,
                    elapsed * 1000 / args.log_interval, cur_loss, cur_aux_loss,
                    math.exp(cur_loss)))
            total_loss = 0
            total_aux_losses = 0
            start_time = time.time()


#        if batch % args.log_interval == 0 and batch > 0:
#            cur_loss = total_loss.item() / args.log_interval
#            cur_aux_loss = total_aux_losses.item() / args.log_interval if args.aux else 0
#            elapsed = time.time() - start_time
#
#            try:
#                print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
#                      'raw_loss {:5.2f} | aux_loss {:5.2f} | ppl {:8.2f}'.format(
#                       epoch, batch, len(data_source) // validseqlen, lr,
#                       elapsed * 1000 / args.log_interval,
#                       cur_loss, cur_aux_loss, math.exp(cur_loss)))
#            except:
#                print(batch)
#                print(i)
#                print(cur_loss)
#
#            total_loss = 0
#            total_aux_losses = 0
#            start_time = time.time()

#            sys.stdout.flush()

    raw_output = None
    output = None
    decoded = None
    targets = None
    final_output = None
    final_decoded = None
    all_decoded = None
    all_outputs = None
    final_raw_output = None

    return None
Ejemplo n.º 14
0
def train(epoch):
    model.train()
    total_loss = 0
    total_aux_losses = 0
    start_time = time.time()
    hidden = model.init_hidden(args.batch_size)
    eff_history_mode = (args.seq_len > args.horizon and not args.repack)

    if eff_history_mode:
        validseqlen = args.seq_len - args.horizon
        seq_len = args.seq_len
    else:
        validseqlen = args.horizon
        seq_len = args.horizon

    for batch, i in enumerate(range(0, train_data.size(0) - 1, validseqlen)):
        # When not using repackaging mode, we DISCARD the first arg.horizon outputs in backprop (which are
        # the "effective history".
        eff_history = args.horizon if eff_history_mode else 0
        if i + eff_history >= train_data.size(0) - 1: continue
        data, targets = get_batch(train_data, i, seq_len)

        if args.repack:
            hidden = repackage_hidden(hidden)
        else:
            hidden = model.init_hidden(args.batch_size)

        optimizer.zero_grad()
        data = data.t()
        net = nn.DataParallel(model) if data.size(0) > 10 else model
        (raw_output, _, output), hidden, all_outputs = net(data,
                                                           hidden,
                                                           decode=False)
        raw_output = raw_output.transpose(0, 1)
        output = output.transpose(0, 1)
        targets = targets[eff_history:].contiguous().view(-1)
        final_output = output[eff_history:].contiguous().view(
            -1, output.size(2))
        dec_weight, dec_bias = model.decoder.weight, model.decoder.bias

        # Loss 1: CE loss
        raw_loss = criterion(dec_weight, dec_bias, final_output, targets)

        # Loss 2: Aux loss
        aux_losses = 0
        if args.aux > 0:
            all_outputs = all_outputs[:, :,
                                      eff_history:].permute(1, 2, 0,
                                                            3).contiguous()
            aux_size = all_outputs.size(0)  # The number of auxiliary losses
            all_outputs = all_outputs.view(aux_size, -1, all_outputs.size(3))
            aux_losses = args.aux * sum([
                criterion(dec_weight, dec_bias, all_outputs[i], targets)
                for i in range(aux_size)
            ])

        # Combine losses
        loss = raw_loss + aux_losses
        loss.backward()

        torch.nn.utils.clip_grad_norm_(params, args.clip)
        optimizer.step()

        total_loss += raw_loss.data
        if args.aux:
            total_aux_losses += aux_losses.data

        if batch % args.log_interval == 0 and batch > 0:
            cur_loss = total_loss.item() / args.log_interval
            cur_aux_loss = total_aux_losses.item(
            ) / args.log_interval if args.aux else 0
            elapsed = time.time() - start_time
            print(
                '| epoch {:3d} | {:5d}/{:5d} batches | lr {:05.5f} | ms/batch {:5.2f} | '
                'raw_loss {:5.2f} | aux_loss {:5.2f} | ppl {:8.2f}'.format(
                    epoch, batch,
                    len(train_data) // validseqlen, lr,
                    elapsed * 1000 / args.log_interval, cur_loss, cur_aux_loss,
                    math.exp(cur_loss)))
            total_loss = 0
            total_aux_losses = 0
            start_time = time.time()

        sys.stdout.flush()

    data = None
    raw_output = None
    output = None
    targets = None
    final_output = None
    all_outputs = None
Ejemplo n.º 15
0
def train(cumulative_steps=None, cumulative_time=None):
    # Turn on training mode which enables dropout.
    model.train()
    total_loss = 0.
    start_time = time.time()
    ntokens = len(corpus.dictionary)
    if args.distributed:
        hidden = model.module.init_hidden(args.batch_size)
    else:
        hidden = model.init_hidden(args.batch_size)
    done = False
    for i, batch in enumerate(train_loader):
        total_duration_tracker_start = time.time()

        # Batch size should be the second dimension, not first.
        (data, targets) = batch
        data = data.t()
        targets = targets.t()

        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        hidden = repackage_hidden(hidden)
        model.zero_grad()
        output, hidden = model(data, hidden)

        # Shape of output and targets need to align.
        loss = criterion(output.view(-1, ntokens), targets.flatten())
        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
        optimizer.step()

        total_loss += loss.item()

        if i % args.log_interval == 0 and i > 0:
            cur_loss = total_loss / args.log_interval
            elapsed = time.time() - start_time
            print(
                '| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
                'loss {:5.2f} | ppl {:8.2f}'.format(
                    epoch, i, len(train_loader), lr,
                    elapsed * 1000 / args.log_interval, cur_loss,
                    math.exp(cur_loss)))
            total_loss = 0
            start_time = time.time()
        if cumulative_steps is not None:
            cumulative_steps += 1
            if (args.throughput_estimation_interval is not None
                    and cumulative_steps % args.throughput_estimation_interval
                    == 0):
                print('[THROUGHPUT_ESTIMATION]\t%s\t%d' %
                      (time.time(), cumulative_steps))

            if args.steps is not None and cumulative_steps >= args.steps:
                done = True
                break
        if args.max_duration is not None:
            cumulative_time += time.time() - total_duration_tracker_start
            total_duration_tracker_start = time.time()
            if cumulative_time >= args.max_duration:
                done = True
                break

    return (cumulative_steps, cumulative_time, done)
Ejemplo n.º 16
0
def rank(data_source, label):
    model.eval()

    with torch.no_grad():
        ndcg_acc = 0
        ndcg_count = 0
        ndcg_acc_1 = 0
        ndcg_count_1 = 0
        ndcg_acc_2 = 0
        ndcg_count_2 = 0
        total_loss = 0
        ntokens = len(track_dic)
        batch_size = data_source.size(1)

        for i in range(0, data_source.size(0) - 1, seq_len):

            data, data_skiped, targets = get_batch_past(data_source,
                                                        label,
                                                        i,
                                                        seq_len,
                                                        evaluation=True)
            data = data.t()
            data_skiped = data_skiped.t()
            targets = targets.t()

            tracks_future, targets_future = get_batch_future(data_source,
                                                             label,
                                                             i,
                                                             seq_len,
                                                             evaluation=True)
            tracks_future = tracks_future.t()
            targets_future = targets_future.t()

            rank_vec = model(data)[:, -1, :]
            rank_vec_skipped = model(data_skiped)[:, -1, :]

            # advoid for loop
            for j in range(batch_size):
                if (targets[j] == 0).sum() <= 5:
                    track_f = tracks_future[j]
                    # remove padding elements
                    #track_f = track_f[track_f!=0]
                    score = rank_vec[j][track_f]
                    # get data frame without padding element
                    df_future = pd.DataFrame({
                        'track':
                        np.array(track_f),
                        'score':
                        np.array(score),
                        'skip_info':
                        np.array(targets_future[j][0:len(track_f)])
                    })
                    # remove padding elements
                    df_future = df_future.loc[df_future['track'] != 0]
                    # sort tracks_future according to score
                    df_future = df_future.sort_values(
                        by='score', ascending=False
                    )  #0.8154440681444343 #0.8227163023038474
                    #df_future = df_future.sample(frac=1) # 0.8115378563756852 #0.7787248338261271
                    # NDCG
                    actual = dcg_score(df_future['skip_info'])
                    best = dcg_score(
                        df_future['skip_info'].sort_values(ascending=True))

                    if best:  #best might be 0, while skip_info is 3,3,3,....
                        ndcg = actual / best
                        ndcg_acc = ndcg_acc + ndcg
                    else:  # avoid nan
                        ndcg_acc = ndcg_acc + 1
                    ndcg_count = ndcg_count + 1

                else:
                    track_f = tracks_future[j]
                    # remove padding elements
                    #track_f = track_f[track_f!=0]
                    score = rank_vec_skipped[j][track_f]
                    # get data frame without padding element
                    df_future = pd.DataFrame({
                        'track':
                        np.array(track_f),
                        'score':
                        np.array(score),
                        'skip_info':
                        np.array(targets_future[j][0:len(track_f)])
                    })
                    # remove padding elements
                    df_future = df_future.loc[df_future['track'] != 0]
                    # sort tracks_future according to score
                    df_future = df_future.sort_values(
                        by='score', ascending=True
                    )  #0.8154440681444343 #0.8227163023038474
                    #df_future = df_future.sample(frac=1) # 0.8115378563756852 #0.7787248338261271
                    # NDCG
                    actual = dcg_score(df_future['skip_info'])
                    best = dcg_score(
                        df_future['skip_info'].sort_values(ascending=True))

                    if best:  #best might be 0, while skip_info is 3,3,3,....
                        ndcg = actual / best
                        ndcg_acc = ndcg_acc + ndcg
                    else:  # avoid nan
                        ndcg_acc = ndcg_acc + 1
                    ndcg_count = ndcg_count + 1
        ndcg_avg = ndcg_acc / ndcg_count
    return ndcg_avg
def train(epoch):
    model.train()
    total_loss = 0
    total_aux_losses = 0
    start_time = time.time()
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(args.batch_size)
    eff_history_mode = (args.seq_len > 0 or not args.repack)

    if eff_history_mode:
        validseqlen = args.seq_len - args.horizon
        seq_len = args.seq_len
    else:
        validseqlen = args.horizon
        seq_len = args.horizon

    for batch, i in enumerate(range(0, train_data.size(0) - 1, validseqlen)):
        # When not using repackaging mode, we DISCARD the first arg.horizon outputs in backprop (which are
        # the "effective history".
        eff_history = args.horizon if eff_history_mode else 0
        if i + eff_history >= train_data.size(0) - 1: continue
        data, targets = get_batch(train_data, i, seq_len)

        if args.repack:
            hidden = repackage_hidden(hidden)
        else:
            hidden = model.init_hidden(args.batch_size)

        optimizer.zero_grad()
        data = data.t()
        net = nn.DataParallel(model) if data.size(0) > 10 else model
        (raw_output, output, decoded), hidden, all_decoded = net(data, hidden)
        decoded = decoded.transpose(0, 1)

        targets = targets[eff_history:].contiguous().view(-1)
        final_decoded = decoded[eff_history:].contiguous().view(-1, ntokens)

        # Loss 1: CE loss
        raw_loss = criterion(final_decoded, targets)

        # Loss 2: Aux loss
        aux_losses = 0
        if args.aux > 0:
            all_decoded = all_decoded[:, :, eff_history:].permute(
                1, 2, 0, 3).contiguous()  # (N, M, L, C) --> (M, L, N, C)
            aux_size = all_decoded.size(0)
            all_decoded = all_decoded.view(aux_size, -1, ntokens)
            aux_losses = args.aux * sum(
                [criterion(all_decoded[i], targets) for i in range(aux_size)])

        # Loss 3: AR & TAR
        alpha_loss = 0
        beta_loss = 0
        if args.alpha > 0:
            output = output.transpose(0, 1)
            final_output = output[eff_history:]
            alpha_loss = args.alpha * final_output.pow(2).mean()
        if args.beta > 0:
            raw_output = raw_output.transpose(0, 1)
            final_raw_output = raw_output[eff_history:]
            beta_loss = args.beta * (final_raw_output[1:] -
                                     final_raw_output[:-1]).pow(2).mean()

        # Combine losses
        loss = raw_loss + aux_losses + alpha_loss + beta_loss
        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
        optimizer.step()

        total_loss += raw_loss.data
        if args.aux:
            total_aux_losses += aux_losses.data

        if batch % args.log_interval == 0 and batch > 0:
            cur_loss = total_loss[0] / args.log_interval
            cur_aux_loss = total_aux_losses[
                0] / args.log_interval if args.aux else 0
            elapsed = time.time() - start_time
            print(
                '| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
                'raw_loss {:5.2f} | aux_loss {:5.2f} | ppl {:8.2f}'.format(
                    epoch, batch,
                    len(train_data) // validseqlen, lr,
                    elapsed * 1000 / args.log_interval, cur_loss, cur_aux_loss,
                    math.exp(cur_loss)))
            total_loss = 0
            total_aux_losses = 0
            start_time = time.time()

            sys.stdout.flush()

    raw_output = None
    output = None
    decoded = None
    targets = None
    final_output = None
    final_decoded = None
    all_decoded = None
    all_outputs = None
    final_raw_output = None
Ejemplo n.º 18
0
def rank(data_source, label_, sessions_feature):
    model.eval()

    with torch.no_grad():
        ndcg_acc = 0
        ndcg_count = 0
        ndcg_acc_1 = 0
        ndcg_count_1 = 0
        ndcg_acc_2 = 0
        ndcg_count_2 = 0
        total_loss = 0
        ntokens = len(track_dic)
        batch_size = data_source.size(1)

        for i in range(0, data_source.size(0) - 1, seq_len):

            data, label, sf = get_batch_past(data_source,
                                             label_,
                                             sessions_feature,
                                             i,
                                             seq_len,
                                             evaluation=True)
            data = data.t()
            label = label.t()
            sf = sf.t()

            tracks_future, targets_future = get_batch_future(data_source,
                                                             label_,
                                                             i,
                                                             seq_len,
                                                             evaluation=True)
            tracks_future = tracks_future.t()
            targets_future = targets_future.t()

            #music_rnn; music_lstm
            hidden = model.init_hidden()
            rank_vec = model(data, sf)[:, -1, :]
            #rank_vec = model(data,sf,hidden)[0][2][:,-1,:]

            # advoid for loop

            # advoid for loop
            for j in range(batch_size):
                track_f = tracks_future[j]
                # remove padding elements
                #track_f = track_f[track_f!=0]
                score = rank_vec[j][track_f]
                # get data frame without padding element
                df_future = pd.DataFrame({
                    'track':
                    np.array(track_f),
                    'score':
                    np.array(score),
                    'skip_info':
                    np.array(targets_future[j][0:len(track_f)])
                })
                # remove padding elements
                df_future = df_future.loc[df_future['track'] != 0]
                # sort tracks_future according to score
                df_future = df_future.sort_values(
                    by='score',
                    ascending=False)  #0.8154440681444343 #0.8227163023038474
                #df_future = df_future.sample(frac=1) # 0.8115378563756852 #0.7787248338261271
                # NDCG
                actual = dcg_score(df_future['skip_info'])
                best = dcg_score(
                    df_future['skip_info'].sort_values(ascending=True))

                if best:  #best might be 0, while skip_info is 3,3,3,....
                    ndcg = actual / best
                    ndcg_acc = ndcg_acc + ndcg
                else:  # avoid nan
                    ndcg_acc = ndcg_acc + 1
                ndcg_count = ndcg_count + 1

                if (label[j] >= 2).sum() < x:  # no more than 5 skip
                    track_f = tracks_future[j]
                    score_1 = rank_vec[j][track_f]
                    df_future_1 = pd.DataFrame({
                        'track':
                        np.array(track_f),
                        'score':
                        np.array(score_1),
                        'skip_info':
                        np.array(targets_future[j][0:len(track_f)])
                    })
                    df_future_1 = df_future_1.loc[df_future_1['track'] != 0]
                    df_future_1 = df_future_1.sort_values(
                        by='score', ascending=False
                    )  #0.8154440681444343 #0.8227163023038474
                    # NDCG
                    actual_1 = dcg_score(df_future_1['skip_info'])
                    best_1 = dcg_score(
                        df_future_1['skip_info'].sort_values(ascending=True))

                    if best:  #best might be 0, while skip_info is 3,3,3,....
                        ndcg_1 = actual_1 / best_1
                        ndcg_acc_1 = ndcg_acc_1 + ndcg_1
                    else:  # avoid nan
                        ndcg_acc_1 = ndcg_acc_1 + 1
                    ndcg_count_1 = ndcg_count_1 + 1
                else:
                    track_f = tracks_future[j]
                    score_2 = rank_vec[j][track_f]
                    df_future_2 = pd.DataFrame({
                        'track':
                        np.array(track_f),
                        'score':
                        np.array(score_2),
                        'skip_info':
                        np.array(targets_future[j][0:len(track_f)])
                    })
                    df_future_2 = df_future_2.loc[df_future_2['track'] != 0]
                    df_future_2 = df_future_2.sort_values(
                        by='score', ascending=False
                    )  #0.8154440681444343 #0.8227163023038474

                    # NDCG
                    actual_2 = dcg_score(df_future_2['skip_info'])
                    best_2 = dcg_score(
                        df_future_2['skip_info'].sort_values(ascending=True))

                    if best:  #best might be 0, while skip_info is 3,3,3,....
                        ndcg_2 = actual_2 / best_2
                        ndcg_acc_2 = ndcg_acc_2 + ndcg_2
                    else:  # avoid nan
                        ndcg_acc_2 = ndcg_acc_2 + 1
                    ndcg_count_2 = ndcg_count_2 + 1
        ndcg_avg = ndcg_acc / ndcg_count
        ndcg_avg_1 = ndcg_acc_1 / ndcg_count_1
        ndcg_avg_2 = ndcg_acc_2 / ndcg_count_2

    return ndcg_avg, ndcg_avg_1, ndcg_avg_2