Exemple #1
0
def baseline(prep_cache_dir, sol_dir):
    print "Starting Baseline"
    e = Evaluation(prep_cache_dir)
    print "Loading cache"
    t_users = load_cache(prep_cache_dir, 'target_users_set')
    local_t_items = load_cache(prep_cache_dir, 'target_items_local_set')
    local_baseline_counts = load_cache(prep_cache_dir, 'tr_baseline_counts')
    online_t_items = set(load_cache(prep_cache_dir, 'target_items_list'))
    online_baseline_counts = load_cache(prep_cache_dir,
                                        'tr_va_baseline_counts')
    prem_users = load_cache(prep_cache_dir, 'prem_user_set')
    print "Done loading"
    print "Score users by tr obs"
    local_temp = score_users_by_obs(t_users, local_baseline_counts, prem_users)
    print "Score users by tr+va obs"
    online_temp = score_users_by_obs(t_users, online_baseline_counts,
                                     prem_users)

    strategy = [1, 2, 3]
    for s in strategy:
        print "Baseline strategy", s
        local_filename = str(s) + '_baseline_local_eval_scores.txt'
        sub_filename = str(s) + '_baseline_submission.txt'
        local_rec_dump = str(s) + '_baseline_local_raw_rec'
        online_rec_dump = str(s) + '_baseline_online_raw_rec'
        local_recs = baseline_recommend(local_temp, local_t_items, s)
        online_recs = baseline_recommend(online_temp, online_t_items, s)
        log_recs(local_recs, local_rec_dump, sol_dir)
        log_recs(online_recs, online_rec_dump, sol_dir)
        e.format_submission(local_recs, online_recs, sol_dir, local_filename,
                            sub_filename)
    print "Done with Baseline"
Exemple #2
0
def compute_scores(raw_data_dir=FLAGS.raw_data, data_dir=FLAGS.data_dir,
  dataset=FLAGS.dataset, save_recommendation=FLAGS.saverec,
  train_dir=FLAGS.train_dir, test=FLAGS.test):
  
  from evaluate import Evaluation as Evaluate
  evaluation = Evaluate(raw_data_dir, test=test)
 
  R = recommend(evaluation.get_uids(), data_dir=data_dir)
  
  evaluation.eval_on(R)
  scores_self, scores_ex = evaluation.get_scores()
  mylog("====evaluation scores (NDCG, RECALL, PRECISION, MAP) @ 2,5,10,20,30====")
  mylog("METRIC_FORMAT (self): {}".format(scores_self))
  mylog("METRIC_FORMAT (ex  ): {}".format(scores_ex))
  if save_recommendation:
    name_inds = os.path.join(train_dir, "indices.npy")
    np.save(name_inds, rec)
 def __init__(self, input, evaluate=False, instant=False):
     self.instant = instant
     self.model = load_model(MODEL)
     self.evaluate = evaluate
     if self.evaluate:
         self.evaluator = Evaluation()
     self.X_train, \
     self.Y_train, \
     self.word2int, \
     self.int2word, \
     self.tag2int, \
     self.int2tag, \
     self.tag2instances = load_data()
     self.input_texts = input
     self.words_pro_sent = []
     self.predicted_tags = []
     self.correct_tags = []
     self.tokenized_sentence = []
    def __init__(self, backbone, head, train_data_loader, val_dataset,
                 criterions=None,
                 loss_weights=None,
                 optimizer=None,
                 backbone_name='backbone',
                 head_name='head',
                 lowest_train_loss=5,
                 use_cuda=True, gpu_list=None):
        self.train_data_loader = train_data_loader
        self.backbone = backbone
        self.head = head
        self.backbone_name = backbone_name
        self.head_name = head_name
        self.loss_forward = loss_forward
        self.evaluation = Evaluation(val_dataset, 'lfw',self.batch_inference)
        self.criterions = criterions
        self.loss_weights = loss_weights
        self.optimizer = optimizer
        self.lowest_train_loss = lowest_train_loss
        self.use_cuda = use_cuda
        self.gpu_list = gpu_list
        self.writer = SummaryWriter()
        sys.stdout = Logger()
        self.epoch = 0
        self.max_epoch = 400
        self.combine_conv_bn_epoch = 150
        self.init_meter()

        if self.criterions is None:
            self.criterions = {'xent': torch.nn.CrossEntropyLoss()}
        if self.loss_weights is None:
            self.loss_weights = torch.as_tensor([1.0]*len(self.criterions))
        if self.gpu_list is None:
            self.gpu_list = range(torch.cuda.device_count())
        if self.use_cuda:
            self.backbone.cuda()
            self.head.cuda()
            self.loss_weights = self.loss_weights.cuda()
Exemple #5
0
def expectimax(game, placements, feature_weights, beam = 1, return_all = False):
  eval = Evaluation(game, feature_weights)
  if not placements:
    return None, eval.value()

  def _expectimax(game, placements):
    if not placements:
      return eval.value()
    value = 0
    for p in placements[0]:
      best = float('-inf')
      moves = game.own.field.moves(p)
      if moves and game.own.skips:
        moves.append(None)
      for m in moves:
        eval.update(m, *game.own.move(m))
        v = _expectimax(game, placements[1:])
        eval.rollback(*game.own.undo())
        if v > best:
          best = v
      value += best
    return value / len(placements[0])

  best = None, float('-inf')
  all = [] if return_all else None

  moves = game.own.field.moves(placements[0][0])
  if moves and game.own.skips:
    moves.append(None)

  if beam < 1 and len(placements) > 1:
    def _snap_eval(m):
      eval.update(m, *game.own.move(m))
      v = eval.value()
      eval.rollback(*game.own.undo())
      return v
    num_beam = int(math.ceil(beam * len(moves)))
    moves = heapq.nlargest(num_beam, moves, key=_snap_eval)

  for m in moves:
    eval.update(m, *game.own.move(m))
    v = _expectimax(game, placements[1:])
    eval.rollback(*game.own.undo())
    if v > best[1]:
      best = m, v

    if all is not None:
      all.append((m, v))

  return (best, all) if return_all else best
Exemple #6
0
rec, neg_u = load_data(daily_dir)
print('before filtering: total counts {}'.format(total_count_ui_pairs(rec)))

rec_filtered = filter_out_negs(rec, neg_u)
print('after filtering:  total counts {}'.format(total_count_ui_pairs(rec_filtered)))

rec_processed = process_rec_single_user_online_round(rec_filtered)

print('after postprocess:  total counts {}'.format(total_count_ui_pairs(rec_processed)))



'''
local evalation part
'''

prep_dir='../examples/preprocessing_cache/'
raw_data='../raw_data/xing/'

if len(sys.argv) >= 3:
  gt_dir = sys.argv[2]

from evaluate import Evaluation
e = Evaluation(prep_dir, raw_data, daily_dir, gt_dir)

scores = e.local_eval_on(rec_processed)
print('scores: {}'.format(scores))


class Tagger:
    def __init__(self, input, evaluate=False, instant=False):
        self.instant = instant
        self.model = load_model(MODEL)
        self.evaluate = evaluate
        if self.evaluate:
            self.evaluator = Evaluation()
        self.X_train, \
        self.Y_train, \
        self.word2int, \
        self.int2word, \
        self.tag2int, \
        self.int2tag, \
        self.tag2instances = load_data()
        self.input_texts = input
        self.words_pro_sent = []
        self.predicted_tags = []
        self.correct_tags = []
        self.tokenized_sentence = []

    def make_lists(self):
        for line in self.input_texts:
            words = []
            tags = []
            if len(line) > 0:
                for word in line.split():
                    if self.evaluate:
                        try:
                            w, tag = word.split('/')
                            w = w.lower()
                            words.append(w)
                            tags.append(tag)
                        except:
                            print("Could not split by / - {}".format(word))
                    else:
                        words.append(word)
                        tags.append("UNK")

            self.words_pro_sent.append(words)
            self.correct_tags.append(tags)

    def get_predicted_tags(self, prediction, tokenized):
        predicted = []
        for i, pred in enumerate(prediction[0]):
            if i >= len(list(enumerate(prediction[0]))) - len(tokenized):
                try:
                    predicted.append(self.int2tag[np.argmax(pred)])
                except KeyError:
                    pass

        return predicted

    def label_data(self):
        words = []
        predicted = []
        self.make_lists()
        for i in range(len(self.words_pro_sent)):
            for word in self.words_pro_sent[i]:
                try:
                    stemObj = UkrainianStemmer(word)
                    word = stemObj.stem_word()
                    self.tokenized_sentence.append(self.word2int[word])

                except KeyError:
                    self.tokenized_sentence.append(self.word2int["<UNKNOWN>"])

            np_tokenized = np.asarray([self.tokenized_sentence])

            padded_tokenized_sentence = pad_sequences(np_tokenized, maxlen=100)
            prediction = self.model.predict(padded_tokenized_sentence)
            predicted_tags = self.get_predicted_tags(prediction,
                                                     self.words_pro_sent[i])

            if self.evaluate:
                self.evaluator.calculate_correct(self.correct_tags[i],
                                                 predicted_tags)

            write_tagged(self.words_pro_sent[i], predicted_tags)
            self.predicted_tags.append(predicted_tags)

        if self.evaluate:
            self.evaluator.print_eval()
Exemple #8
0
def get_data(raw_data,
             data_dir=FLAGS.data_dir,
             combine_att=FLAGS.combine_att,
             logits_size_tr=FLAGS.item_vocab_size,
             thresh=FLAGS.vocab_min_thresh,
             use_user_feature=FLAGS.use_user_feature,
             test=FLAGS.test,
             mylog=mylog,
             use_item_feature=FLAGS.use_item_feature,
             recommend=False):

    (data_tr, data_va, u_attr, i_attr, item_ind2logit_ind, logit_ind2item_ind,
     user_index,
     item_index) = read_attributed_data(raw_data_dir=raw_data,
                                        data_dir=data_dir,
                                        combine_att=combine_att,
                                        logits_size_tr=logits_size_tr,
                                        thresh=thresh,
                                        use_user_feature=use_user_feature,
                                        use_item_feature=use_item_feature,
                                        test=test,
                                        mylog=mylog)

    # remove unk
    data_tr = [p for p in data_tr if (p[1] in item_ind2logit_ind)]

    # remove items before week 40
    if FLAGS.after40:
        data_tr = [p for p in data_tr if (to_week(p[2]) >= 40)]

    # item frequency (for sampling)
    item_population, p_item = item_frequency(data_tr, FLAGS.power)

    # UNK and START
    # print(len(item_ind2logit_ind))
    # print(len(logit_ind2item_ind))
    # print(len(item_index))
    START_ID = len(item_index)
    # START_ID = i_attr.get_item_last_index()
    item_ind2logit_ind[START_ID] = 0
    seq_all = form_sequence(data_tr, maxlen=FLAGS.L)
    seq_tr0, seq_va0 = split_train_dev(seq_all, ratio=0.05)

    # calculate buckets
    global _buckets
    _buckets = calculate_buckets(seq_tr0 + seq_va0, FLAGS.L, FLAGS.n_bucket)
    _buckets = sorted(_buckets)

    # split_buckets
    seq_tr = split_buckets(seq_tr0, _buckets)
    seq_va = split_buckets(seq_va0, _buckets)

    # get test data
    if recommend:
        from evaluate import Evaluation as Evaluate
        evaluation = Evaluate(raw_data, test=test)
        uids = evaluation.get_uinds()  # abuse of 'uids'  : actually uinds
        seq_test = form_sequence_prediction(seq_all, uids, FLAGS.L, START_ID)
        _buckets = calculate_buckets(seq_test, FLAGS.L, FLAGS.n_bucket)
        _buckets = sorted(_buckets)
        seq_test = split_buckets(seq_test, _buckets)
    else:
        seq_test = []
        evaluation = None
        uids = []

    # create embedAttr

    devices = get_device_address(FLAGS.N)
    with tf.device(devices[0]):
        u_attr.set_model_size(FLAGS.size)
        i_attr.set_model_size(FLAGS.size)

        # if not FLAGS.use_item_feature:
        #     mylog("NOT using item attributes")
        #     i_attr.num_features_cat = 1
        #     i_attr.num_features_mulhot = 0

        # if not FLAGS.use_user_feature:
        #     mylog("NOT using user attributes")
        #     u_attr.num_features_cat = 1
        #     u_attr.num_features_mulhot = 0

        embAttr = embed_attribute.EmbeddingAttribute(u_attr,
                                                     i_attr,
                                                     FLAGS.batch_size,
                                                     FLAGS.n_sampled,
                                                     _buckets[-1],
                                                     FLAGS.use_sep_item,
                                                     item_ind2logit_ind,
                                                     logit_ind2item_ind,
                                                     devices=devices)

        if FLAGS.loss in ["warp", 'mw']:
            prepare_warp(embAttr, seq_tr0, seq_va0)

    return seq_tr, seq_va, seq_test, embAttr, START_ID, item_population, p_item, evaluation, uids, user_index, item_index, logit_ind2item_ind
Exemple #9
0
    #Training Linear Regression
    regression = __Linear_Regression.fit(Xtrain, Ytrain)

    #predict Fuzzy
    Ypredict_Fuzzy = []
    sum = 0
    for index in range(np.shape(Xtest)[0]):
        evoparation, humidity, pressure, cloud, temp = Xtest[index][0], Xtest[index][1]\
                                                        ,Xtest[index][2], Xtest[index][3]\
                                                        , Xtest[index][4]
        result_predict = __FIS.predict(evoparation, humidity, pressure, cloud,
                                       temp)
        Ypredict_Fuzzy.append(result_predict)
        sum += (result_predict - Ytest[index])**2

    #predict Linear Regression
    Ypredict_LinearRegression = regression.predict(Xtest)

    #eval
    __Evaluation = Evaluation()
    MSE_Fuzzylogic = sum / len(Ytest)

    MSE_LinearRegression = __Evaluation.eval(Ypredict_LinearRegression, Ytest)
    print(MSE_Fuzzylogic, MSE_LinearRegression)

    #Plot
    # __Plot_Figure = Plot_Figure()
    # label = 'Fuzzy logic '
    # __Plot_Figure.plot(label, Ypredict_Fuzzy, Ytest)
Exemple #10
0
def recommend(raw_data=FLAGS.raw_data, test=FLAGS.test, loss=FLAGS.loss, 
  batch_size=FLAGS.batch_size, topN=FLAGS.top_N_items,
  device_log=FLAGS.device_log):

  with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, 
    log_device_placement=device_log)) as sess:
    mylog("reading data")

    (_, items_dev, _, _, u_attributes, i_attributes, item_ind2logit_ind, 
      logit_ind2item_ind, _, user_index, item_index) = get_data(raw_data, 
      data_dir=FLAGS.data_dir)
    
    from evaluate import Evaluation as Evaluate
    
    evaluation = Evaluate(raw_data, test=test)
    
    model = create_model(sess, u_attributes, i_attributes, item_ind2logit_ind,
      logit_ind2item_ind, loss=loss, ind_item=None)

    Uinds = evaluation.get_uinds()
    N = len(Uinds)
    mylog("N = %d" % N)
    Uinds = [p for p in Uinds if p in items_dev]
    mylog("new N = {}, (reduced from original {})".format(len(Uinds), N))
    if len(Uinds) < N:
      evaluation.set_uinds(Uinds)
    N = len(Uinds)
    rec = np.zeros((N, topN), dtype=int)
    count = 0
    time_start = time.time()
    for idx_s in range(0, N, batch_size):
      count += 1
      if count % 100 == 0:
        mylog("idx: %d, c: %d" % (idx_s, count))
        
      idx_e = idx_s + batch_size
      if idx_e <= N:
        users = Uinds[idx_s: idx_e]
        items_input = [items_dev[u] for u in users]
        items_input = map(list, zip(*items_input))
        recs = model.step(sess, users, items_input, forward_only=True, 
          recommend = True, recommend_new = FLAGS.recommend_new)
        rec[idx_s:idx_e, :] = recs
      else:
        users = range(idx_s, N) + [0] * (idx_e - N)
        users = [Uinds[t] for t in users]
        items_input = [items_dev[u] for u in users]
        items_input = map(list, zip(*items_input))
        recs = model.step(sess, users, items_input, forward_only=True, 
          recommend = True, recommend_new = FLAGS.recommend_new)
        idx_e = N
        rec[idx_s:idx_e, :] = recs[:(idx_e-idx_s),:]
    # return rec: i:  uinds[i] --> logid

    time_end = time.time()
    mylog("Time used %.1f" % (time_end - time_start))

    ind2id = {}
    for iid in item_index:
      uind = item_index[iid]
      assert(uind not in ind2id)
      ind2id[uind] = iid
    
    uids = evaluation.get_uids()
    R = {}
    for i in xrange(N):
      uid = uids[i]
      R[uid] = [ind2id[logit_ind2item_ind[v]] for v in list(rec[i, :])]

    evaluation.eval_on(R)
    scores_self, scores_ex = evaluation.get_scores()
    mylog("====evaluation scores (NDCG, RECALL, PRECISION, MAP) @ 2,5,10,20,30====")
    mylog("METRIC_FORMAT (self): {}".format(scores_self))
    mylog("METRIC_FORMAT (ex  ): {}".format(scores_ex))

  return
Exemple #11
0
    def evaluate_hand(self, list_cards):

        evaluation = Evaluation(list_cards).evaluate_hand()
        return evaluation
class NetTrain:
    def __init__(self, backbone, head, train_data_loader, val_dataset,
                 criterions=None,
                 loss_weights=None,
                 optimizer=None,
                 backbone_name='backbone',
                 head_name='head',
                 lowest_train_loss=5,
                 use_cuda=True, gpu_list=None):
        self.train_data_loader = train_data_loader
        self.backbone = backbone
        self.head = head
        self.backbone_name = backbone_name
        self.head_name = head_name
        self.loss_forward = loss_forward
        self.evaluation = Evaluation(val_dataset, 'lfw',self.batch_inference)
        self.criterions = criterions
        self.loss_weights = loss_weights
        self.optimizer = optimizer
        self.lowest_train_loss = lowest_train_loss
        self.use_cuda = use_cuda
        self.gpu_list = gpu_list
        self.writer = SummaryWriter()
        sys.stdout = Logger()
        self.epoch = 0
        self.max_epoch = 400
        self.combine_conv_bn_epoch = 150
        self.init_meter()

        if self.criterions is None:
            self.criterions = {'xent': torch.nn.CrossEntropyLoss()}
        if self.loss_weights is None:
            self.loss_weights = torch.as_tensor([1.0]*len(self.criterions))
        if self.gpu_list is None:
            self.gpu_list = range(torch.cuda.device_count())
        if self.use_cuda:
            self.backbone.cuda()
            self.head.cuda()
            self.loss_weights = self.loss_weights.cuda()

    def init_meter(self):
        self.accuracy_top_1 = AverageMeter()
        self.accuracy_top_5 = AverageMeter()
        self.total_losses_meter = AverageMeter()
        self.loss_meters = list()
        for index, criterion_name in enumerate(self.criterions.keys()):
            self.loss_meters.append(AverageMeter())

    def reset_meter(self):
        self.accuracy_top_1.reset()
        self.accuracy_top_5.reset()
        self.total_losses_meter.reset()
        for index, criterion_name in enumerate(self.criterions.keys()):
            self.loss_meters[index].reset()

    def load_checkpoint(self, check_point, finetune=False, pretrained=False):
        check_point = torch.load(check_point)
        if pretrained:
            self.backbone.load_state_dict(check_point)
            return
        if finetune:
            # 导入特征提取部分网络参数
            mapped_state_dict = self.backbone.state_dict()
            for key, value in check_point['backbone'].items():
                mapped_state_dict[key] = value
            self.backbone.load_state_dict(mapped_state_dict)
            # 导入特征提取部分优化子参数
            optimizer_state_dict = self.optimizer.state_dict()
            param_len = len(optimizer_state_dict['param_groups'][0]['params'])
            for index in range(param_len):
                optimizer_state_dict['state'].update({
                    optimizer_state_dict['param_groups'][0]['params'][index]:
                        check_point['optimizer']['state'].get(
                            check_point['optimizer']['param_groups'][0]['params'][index])})
            self.optimizer.load_state_dict(optimizer_state_dict)
        else:
            self.lowest_train_loss = check_point['loss']
            self.epoch = check_point['epoch']
            if self.epoch > 150:
                fuse_module(self.backbone)
                fuse_module(self.head)
            print("lowest_train_loss: ", self.lowest_train_loss)
            mapped_state_dict = self.backbone.state_dict()
            for key, value in check_point['backbone'].items():
                mapped_state_dict[key] = value
            self.backbone.load_state_dict(mapped_state_dict)

            mapped_state_dict = self.head.state_dict()
            for key, value in check_point['head'].items():
                mapped_state_dict[key] = value
            self.head.load_state_dict(mapped_state_dict)
            self.optimizer.load_state_dict(check_point['optimizer'])

    def finetune_model(self):
        if isinstance(self.backbone, torch.nn.DataParallel):
            backbone_named_children = self.backbone.module.named_children()
        else:
            backbone_named_children = self.backbone.named_children()
        if isinstance(self.head, torch.nn.DataParallel):
            head_named_children = self.head.module.named_children()
        else:
            head_named_children = self.head.named_children()
        for name, module in backbone_named_children:
            module.eval()
            for p in module.parameters():
                p.requires_grad = False
        for name, module in head_named_children:
            module.train()
            for p in module.parameters():
                p.requires_grad = True

    def set_bn_eval(self, m):
        classname = m.__class__.__name__
        if classname.find('BatchNorm') != -1:
            m.eval()

    def adjust_lr_exp(self, optimizer, ep, total_ep, start_decay_at_ep):
        """Decay exponentially in the later phase of training. All parameters in the
        optimizer share the same learning rate.

        Args:
          optimizer: a pytorch `Optimizer` object
          base_lr: starting learning rate
          ep: current epoch, ep >= 1
          total_ep: total number of epochs to train
          start_decay_at_ep: start decaying at the BEGINNING of this epoch

        Example:
          base_lr = 2e-4
          total_ep = 300
          start_decay_at_ep = 201
          It means the learning rate starts at 2e-4 and begins decaying after 200
          epochs. And training stops after 300 epochs.

        NOTE:
          It is meant to be called at the BEGINNING of an epoch.
        """
        assert ep >= 1, "Current epoch number should be >= 1"
        if ep < start_decay_at_ep:  # warm-up
            for g in optimizer.param_groups:
                g['lr'] = (g['initial_lr'] * 0.1 * (10 ** (float(ep) / start_decay_at_ep)))
                print('=====> lr adjusted to {:.10f}'.format(g['lr']).rstrip('0'))
        else:
            for g in optimizer.param_groups:
                g['lr'] = (g['initial_lr'] * (0.001 ** (float(ep + 1 - start_decay_at_ep)
                                                        / (total_ep + 1 - start_decay_at_ep))))
                print('=====> lr adjusted to {:.10f}'.format(g['lr']).rstrip('0'))

    def eval(self):
        self.backbone.eval()
        self.head.eval()
        accuracy, best_thresholds, roc_curve_tensor = self.evaluation.evaluate()
        buffer_val(self.writer, 'lfw', accuracy, best_thresholds, roc_curve_tensor, self.epoch)
        # self.evaluation.eval_rerank()

    def train(self, epoches=10, save_flag=True, finetune=False):
        if len(self.gpu_list) > 1:
            self.backbone = torch.nn.DataParallel(self.backbone, device_ids=self.gpu_list)
            self.head = torch.nn.DataParallel(self.head, device_ids=self.gpu_list)
            cudnn.benchmark = True
        # scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=20, gamma=0.1, last_epoch=self.epoch)
        # scheduler = torch.optim.lr_scheduler.MultiStepLR(self.optimizer, milestones=[30, 60, 100],
        #                                                  gamma=0.1, last_epoch=self.epoch)
        while self.epoch < epoches:
            print("Epoch: ", self.epoch)
            self.adjust_lr_exp(self.optimizer, self.epoch + 1, epoches, int(finetune) * 10 + 10)
            if self.epoch % 10 == 0:
                print(self.optimizer)
            self.reset_meter()
            if finetune and self.epoch < 10:
                self.finetune_model()
            else:
                self.backbone.train()
                self.head.train()
            if self.epoch == self.combine_conv_bn_epoch:
                # 冻结BN参数更新
                # self.model.apply(self.set_bn_eval)
                # 融合conv+bn
                fuse_module(self.backbone)
                fuse_module(self.head)
            self.train_epoch()
            # scheduler.step()
            if (self.epoch + 1) % 10 == 0:
                self.eval()
            if save_flag:
                self.save_model()
                self.save_model(False, False)
            self.epoch += 1
        torch.cuda.empty_cache()
        self.writer.close()
        print("Finished training.")

    def search_learn_rate(self):
        if self.use_cuda:
            self.backbone = torch.nn.DataParallel(self.backbone, device_ids=self.gpu_list)
            self.head = torch.nn.DataParallel(self.head, device_ids=self.gpu_list)
            cudnn.benchmark = True
        print(self.optimizer)
        lr_mult = (1 / 1e-5) ** (1 / self.train_data_loader.__len__())
        self.backbone.train()
        self.head.train()
        self.reset_meter()
        train_data_batch = self.train_data_loader.__len__()
        batch_iterator = iter(self.train_data_loader)
        for step in range(train_data_batch):
            start = time.time()

            images, targets = next(batch_iterator)
            self.batch_inference(images, targets)

            end = time.time()
            batch_time = end - start
            eta = int(batch_time * ((train_data_batch - step) + (self.max_epoch - self.epoch) * train_data_batch))
            for g in self.optimizer.param_groups:
                g['lr'] = (g['lr'] * lr_mult)
            if (step + 1) % 10 == 0:
                print_infos = 'Epoch:{}/{} || Epochiter: {}/{} || Batchtime: {:.4f} s || ETA: {} || ' \
                    .format(self.epoch, self.max_epoch, step, train_data_batch,
                            batch_time, str(datetime.timedelta(seconds=eta)))
                print_infos += 'acc_top1: {:>.4f}, acc_top5: {:>.4f}, total_loss: {:>.4f}( {:>.4f})'.format(
                    self.accuracy_top_1.avg, self.accuracy_top_5.avg,
                    self.total_losses_meter.val, self.total_losses_meter.avg)
                for index, criterion_name in enumerate(self.criterions.keys()):
                    print_infos = print_infos + f", {criterion_name}: {self.loss_meter[index].val:>.4f}" \
                                                f"({self.loss_meter[index].avg:>.4f})"
                print(print_infos)
                self.writer.add_scalar('loss/loss', self.total_losses_meter.val, step)
                self.writer.add_scalar('loss/total_loss', self.total_losses_meter.val, step)
                for index, criterion_name in enumerate(self.criterions.keys()):
                    self.writer.add_scalar(f'loss/{criterion_name}', self.loss_meter[index].val,
                                           step)
                self.writer.add_scalar('acc/acc_top1', self.accuracy_top_1.val, step)
                self.writer.add_scalar('acc/acc_top5', self.accuracy_top_5.val, step)
                self.writer.add_scalar('learning_rate', self.optimizer.param_groups[0]['lr'], step)
            if (step + 1) % 100 == 0:
                print(self.optimizer)
        torch.cuda.empty_cache()
        self.writer.close()
        print("Finished training.")

    def train_epoch(self):
        train_data_batch = self.train_data_loader.__len__()
        batch_iterator = iter(self.train_data_loader)
        for step in range(train_data_batch):
            start = time.time()

            images, targets = next(batch_iterator)
            self.batch_inference(images, targets)

            end = time.time()
            batch_time = end - start
            eta = int(batch_time * ((train_data_batch - step) + (self.max_epoch - self.epoch) * train_data_batch))
            if step % 20 == 0:
                print_infos = 'Epoch:{}/{} || Epochiter: {}/{} || Batchtime: {:.4f} s || ETA: {} || ' \
                    .format(self.epoch, self.max_epoch, step, train_data_batch,
                            batch_time, str(datetime.timedelta(seconds=eta)))
                print_infos += ' acc_top1: {:>.4f}, acc_top5: {:>.4f}, total_loss: {:>.4f}( {:>.4f})'.format(
                    self.accuracy_top_1.avg, self.accuracy_top_5.avg,
                    self.total_losses_meter.val, self.total_losses_meter.avg)
                for index, criterion_name in enumerate(self.criterions.keys()):
                    print_infos = print_infos + f", {criterion_name}: {self.loss_meters[index].val:>.4f}" \
                                                f"({self.loss_meters[index].avg:>.4f})"
                print(print_infos)
            if step % 100 == 0:
                # Window
                # self.writer.add_image('Image', images, step + self.epoch * train_data_batch)
                # Linux
                # self.writer.add_image('Image', image, step + self.epoch * train_data_batch, dataformats='NCHW')
                for name, param in self.backbone.named_parameters():
                    self.writer.add_histogram(name, param.clone().cpu().data.numpy(),
                                              step + self.epoch * train_data_batch)
                for name, param in self.head.named_parameters():
                    self.writer.add_histogram(
                        name,
                        param.clone().cpu().data.numpy(),
                        step + self.epoch * train_data_batch)
                self.writer.add_scalar('loss/total_loss', self.total_losses_meter.val, step + self.epoch * train_data_batch)
                for index, criterion_name in enumerate(self.criterions.keys()):
                    self.writer.add_scalar(f'loss/{criterion_name}', self.loss_meters[index].val,
                                           step + self.epoch * train_data_batch)
                self.writer.add_scalar('acc/acc_top1', self.accuracy_top_1.val, step + self.epoch * train_data_batch)
                self.writer.add_scalar('acc/acc_top5', self.accuracy_top_5.val, step + self.epoch * train_data_batch)
        print("Total train loss:", self.total_losses_meter.avg)

    def save_model(self, save_head=True, save_optimizer=True):
        if self.total_losses_meter.avg < self.lowest_train_loss or self.total_losses_meter.avg < 2.0:
            state = {
                'backbone': self.backbone.module.state_dict() if self.use_cuda else self.backbone.state_dict(),
                'loss': self.total_losses_meter.avg,
                'epoch': self.epoch + 1
            }
            if save_optimizer:
                state.update({'optimizer': self.optimizer.state_dict()})
            model_name = self.backbone_name
            if save_head:
                state.update({'head': self.head.module.state_dict() if self.use_cuda else self.head.state_dict()})
                model_name = '_'.join([self.backbone_name, self.head_name])
            if not os.path.exists('./checkpoints'):
                os.makedirs('./checkpoints')
            save_path = './checkpoints/{}_{}_{:.04f}.pth'.format(model_name, self.epoch,
                                                                 self.total_losses_meter.avg)
            torch.save(state, save_path)
        if self.total_losses_meter.avg < self.lowest_train_loss:
            self.lowest_train_loss = self.total_losses_meter.avg

    def batch_inference(self, images, targets=None, backward=True):
        if self.use_cuda:
            images = images.cuda()
            if targets is not None:
                targets = targets.cuda()
        if (not self.backbone.training and not self.head.training) or targets is None:
            features = self.backbone(images)
            return features
        features = self.backbone(images)
        outputs = self.head(features, targets.long())
        total_loss = 0

        losses = self.loss_forward(self.criterions, features, outputs, targets)
        accuracy_top_1, accuracy_top_5 = accuracy(outputs, targets, (1, 5))
        total_loss = torch.stack(losses).mul(self.loss_weights).sum()

        if backward:
            self.optimizer.zero_grad()
            total_loss.backward()
            apply_weight_decay(self.backbone)
            apply_weight_decay(self.head)
            self.optimizer.step()

        losses_value = []
        for index, criterion_name in enumerate(self.criterions.keys()):
            losses_value.append(losses[index].item())
        total_loss_value = total_loss.item()
        accuracy_top_1_value = accuracy_top_1.item()
        accuracy_top_5_value = accuracy_top_5.item()

        for index, criterion_name in enumerate(self.criterions.keys()):
            self.loss_meters[index].update(losses_value[index], targets.size(0))
        self.total_losses_meter.update(total_loss_value, targets.size(0))
        self.accuracy_top_1.update(accuracy_top_1_value, targets.size(0))
        self.accuracy_top_5.update(accuracy_top_5_value, targets.size(0))
        return outputs
Exemple #13
0
def run_epoch(data, is_training, model, optimizer):
    '''
    Train model for one pass of train data, and return loss, acccuracy
    '''
    data_loader = torch.utils.data.DataLoader(data,
                                              batch_size=20,
                                              shuffle=True,
                                              num_workers=4,
                                              drop_last=False)

    losses = []

    if is_training:
        model.train()
    else:
        model.eval()

    for batch in data_loader:
        pid_title = torch.unsqueeze(Variable(batch['pid_title']), 1)
        pid_body = torch.unsqueeze(Variable(batch['pid_body']), 1)
        rest_title = Variable(batch['rest_title'])
        rest_body = Variable(batch['rest_body'])

        pid_title_pad = torch.unsqueeze(Variable(batch['pid_title_pad']), 1)
        pid_body_pad = torch.unsqueeze(Variable(batch['pid_body_pad']), 1)
        rest_title_pad = Variable(batch['rest_title_pad'])
        rest_body_pad = Variable(batch['rest_body_pad'])

        pid_title, pid_body = pid_title.cuda(), pid_body.cuda()
        rest_title, rest_body = rest_title.cuda(), rest_body.cuda()
        pid_title_pad, pid_body_pad = pid_title_pad.cuda(), pid_body_pad.cuda()
        rest_title_pad, rest_body_pad = rest_title_pad.cuda(
        ), rest_body_pad.cuda()

        if is_training:
            optimizer.zero_grad()

        pt = model(pid_title)
        pb = model(pid_body)
        rt = model(rest_title)
        rb = model(rest_body)

        # we need to take the mean pooling taking into account the padding
        # tensors are of dim batch_size x samples x output_size x (len - kernel + 1)
        # pad tensors are of dim batch_size x samples x (len - kernel + 1)

        pid_title_pad_ex = torch.unsqueeze(pid_title_pad, 2).expand_as(pt)
        pid_body_pad_ex = torch.unsqueeze(pid_body_pad, 2).expand_as(pb)
        rest_title_pad_ex = torch.unsqueeze(rest_title_pad, 2).expand_as(rt)
        rest_body_pad_ex = torch.unsqueeze(rest_body_pad, 2).expand_as(rb)

        pt = torch.squeeze(torch.sum(pt * pid_title_pad_ex, dim=3), dim=3)
        pb = torch.squeeze(torch.sum(pb * pid_body_pad_ex, dim=3), dim=3)
        rt = torch.squeeze(torch.sum(rt * rest_title_pad_ex, dim=3), dim=3)
        rb = torch.squeeze(torch.sum(rb * rest_body_pad_ex, dim=3), dim=3)

        # tensors are not of dim batch_size x samples x output_size
        # need to scale down because not all uniformly padded

        ptp_norm = torch.sum(pid_title_pad, dim=2).clamp(min=1).expand_as(pt)
        pbp_norm = torch.sum(pid_body_pad, dim=2).clamp(min=1).expand_as(pb)
        rtp_norm = torch.sum(rest_title_pad, dim=2).clamp(min=1).expand_as(rt)
        rbp_norm = torch.sum(rest_body_pad, dim=2).clamp(min=1).expand_as(rb)

        pt = pt / ptp_norm
        pb = pb / pbp_norm
        rt = rt / rtp_norm
        rb = rb / rbp_norm

        pid_tensor = (pt + pb) / 2
        rest_tensor = (rt + rb) / 2

        if is_training:
            loss = loss_function(pid_tensor, rest_tensor)
            loss.backward()
            losses.append(loss.cpu().data[0])
            optimizer.step()
        else:
            expanded = pid_tensor.expand_as(rest_tensor)
            similarity = cs(expanded, rest_tensor, dim=2).squeeze(2)
            similarity = similarity.data.cpu().numpy()
            labels = batch['labels'].numpy()
            l = convert(similarity, labels)
            losses.extend(l)

    # Calculate epoch level scores
    if is_training:
        avg_loss = np.mean(losses)
        return avg_loss
    else:
        e = Evaluation(losses)
        MAP = e.MAP() * 100
        MRR = e.MRR() * 100
        P1 = e.Precision(1) * 100
        P5 = e.Precision(5) * 100
        return (MAP, MRR, P1, P5)
Exemple #14
0
def compute_scores(raw_data_dir=FLAGS.raw_data, data_dir=FLAGS.data_dir,
                   dataset=FLAGS.dataset, save_recommendation=FLAGS.saverec,
                   train_dir=FLAGS.train_dir, test=FLAGS.test, true_targets=FLAGS.true_targets):

    from evaluate import Evaluation
    from post_processing import filter_out_negs
    e = Evaluation(FLAGS.prep_dir, raw_data_dir, FLAGS.raw_data_daily)

    daily_raw_data_dir=FLAGS.raw_data_daily

    if true_targets:
        if FLAGS.reverse:
            user_file_name = os.path.join(daily_raw_data_dir, 'daily_target_items_list')
        else:
            user_file_name = os.path.join(daily_raw_data_dir, 'daily_target_users_set')
    else:
        if FLAGS.reverse:
            user_file_name = os.path.join(raw_data_dir, 'daily_target_local_list')
        else:
            user_file_name = os.path.join(raw_data_dir, 'target_users_set')
            
        
    if FLAGS.new_users:
        if FLAGS.reverse:
            item_file_name = os.path.join(daily_raw_data_dir, 'daily_target_users_set')
        else:
            item_file_name = os.path.join(daily_raw_data_dir, 'daily_target_items_list')
    else:
        if FLAGS.reverse:
            item_file_name = os.path.join(raw_data_dir, 'target_users_set')
        else:
            item_file_name = os.path.join(raw_data_dir, 'daily_target_local_list')

#     if FLAGS.reverse:
#         target_users = pickle.load(open(user_file_name, 'rb'))
#         t_ids = pickle.load(open(item_file_name, 'rb'))
#     else:
    target_users = pickle.load(open(item_file_name, 'rb'))
    t_ids = pickle.load(open(user_file_name, 'rb'))
        
    #t_ids = pickle.load(open(item_file_name, 'rb'))
    t_ids = list(t_ids)
    target_users = set(target_users)
    print(FLAGS.new_users, FLAGS.true_targets)
    print(len(t_ids),len(target_users))
    if FLAGS.reverse:
        suf = ''
    else:
        suf = '_rev'

    if true_targets and FLAGS.new_users:
        reclogfile = 'online_raw_rec_hmf' + suf
           
    elif not FLAGS.true_targets and not FLAGS.new_users:
        reclogfile = 'local_raw_rec_hmf' + suf

#     if true_targets:
#         reclogfile = "online_raw_rec_hmf"
#         t_ids = pickle.load(open(os.path.join(daily_raw_data_dir, 'daily_target_items_list'), 'rb'))
#         target_users = pickle.load(open(os.path.join(daily_raw_data_dir, 'daily_target_users_set'), 'rb'))
#     else:
#         reclogfile = "local_raw_rec_hmf"
#         t_ids = pickle.load(open(os.path.join(raw_data_dir, 'target_items_local_list'), 'rb'))
#         target_users = pickle.load(open(os.path.join(raw_data_dir, 'target_users_set'), 'rb'))

    R = recommend(t_ids, data_dir=data_dir)
    # rec_save_path = os.path.join(daily_raw_data_dir, reclogfile)
    # R = pickle.load(open(rec_save_path, 'rb'))

    if save_recommendation:
        rec_save_path = os.path.join(daily_raw_data_dir, reclogfile)
        pickle.dump(R, open(rec_save_path, 'wb'))

    # R = filter_recs(R, set(target_users))

    if not FLAGS.reverse:
        print('no post processing is needed. return')
        return
    neg_users_set = pickle.load(open(os.path.join(FLAGS.prep_dir, 'neg_users_set')+suf, 'rb'))

    # e.online_solutions_write(R, daily_raw_data_dir, 'basic_rec_done')
    R_filtered = filter_out_negs(R, neg_users_set)
    # e.online_solutions_write(R_filtered, daily_raw_data_dir, 'fitered_out_negs_done')

    if true_targets:
        # R = process_rec_single_user_online_round(R)
        # e.online_solutions_write(R, daily_raw_data_dir, 'online_submission.txt')
        R_filtered = process_rec_single_user_online_round(R_filtered)
        e.online_solutions_write(R_filtered, daily_raw_data_dir, 'neg_filtered_online_submission' + suf + '.txt')
    else:
        scores = e.local_eval_on(R)
        e.local_write_scores(scores, 'local_eval_scores'+suf+'.txt', train_dir)
        scores_filteredR = e.local_eval_on(R_filtered)
        e.local_write_scores(scores_filteredR, 'neg_filtered_local_eval_scores'+suf+'.txt', train_dir)
Exemple #15
0
def main():
    # create environment
    dist1 = Distribution(id=0, vals=[2], probs=[1])
    dist2 = Distribution(id=1, vals=[5], probs=[1])
    dist3 = Distribution(id=2, vals=[2, 8], probs=[0.5, 0.5])

    env = Environment(total_bandwidth = 10,\
        distribution_list=[dist1,dist2,dist3], \
        mu_list=[1,2,3], lambda_list=[3,2,1],\
        num_of_each_type_distribution_list=[300,300,300])
    evaluation = Evaluation()
    obs_dim = 6
    act_dim = 2
    # logger.info('obs_dim {}, act_dim {}'.format(obs_dim, act_dim))

    # 根据parl框架构建agent
    model = PGTorchModel(state_dim=obs_dim, act_dim=act_dim)
    alg = PGTorchAlgorithm(model, lr=LEARNING_RATE)
    agent = PGTorchAgent(alg, obs_dim=obs_dim, act_dim=act_dim)

    # 加载模型
    if os.path.exists('./pg_torch_model'):
        agent.restore('./pg_torch_model')
    writer = SummaryWriter()

    for i in range(1000):
        obs_list, action_list, reward_list = run_episode(env, agent)
        writer.add_scalars('Reward/train', {'train_reward':sum(reward_list)/len(reward_list), \
                'reject when full': evaluation.reject_when_full_avg_reward, \
                    'always accept': evaluation.always_accept_avg_reward,\
                        'always reject': evaluation.always_reject_avg_reward}, i)
        # writer.add_scalar('Reward/train', evaluation.always_reject_avg_reward, i)

        if i % 10 == 0:
            # logger.info("Episode {}, Reward Sum {}.".format(
            #     i, sum(reward_list)))
            print("Episode {}, Reward Sum {}.".format(
                i,
                sum(reward_list) / len(reward_list)))
        batch_obs = torch.from_numpy(np.array(obs_list))

        batch_action = torch.from_numpy(np.array(action_list))
        batch_reward = torch.from_numpy(
            calc_reward_to_go(reward_list, gamma=0.9))

        loss = agent.learn(batch_obs, batch_action, batch_reward)
        writer.add_scalar('Loss/train', loss, i)
        if (i + 1) % 100 == 0:
            avg_reward, avg_acc_rate = evaluation.evaluate(agent)
            writer.add_scalars('reward Test', {'test reward': avg_reward, \
                'reject when full': evaluation.reject_when_full_avg_reward, \
                    'always accept': evaluation.always_accept_avg_reward,\
                        'always reject': evaluation.always_reject_avg_reward}, i)
            writer.add_scalars('accept rate Test', {'test rate': avg_acc_rate, \
                'reject when full': evaluation.reject_when_full_avg_acc_rate, \
                    'always accept': evaluation.always_accept_avg_acc_rate,\
                        'always reject': evaluation.always_reject_avg_acc_rate}, i)
            print('avg_reward', avg_reward, 'avg_acc_rate', avg_acc_rate,
                  'base ', evaluation.reject_when_full_avg_reward)

    writer.close()
    # save the parameters to ./pg_torch_model
    agent.save('./pg_torch_model')
Exemple #16
0
print "Finished loading"

print "Starting post processing"
t_users_int_set = load_cache(prep_dir, 'target_users_set')
neg_user_int_set = load_cache(prep_dir, 'neg_users_set')

filtered_rec_local = filter_recs(rec_local, t_users_int_set)
filtered_rec_online = filter_recs(rec_online, t_users_int_set)
print "Intermediate"
neg_filtered_rec_local = filter_out_negs(filtered_rec_local, neg_user_int_set)
neg_filtered_rec_online = filter_out_negs(filtered_rec_online,
                                          neg_user_int_set)

print "Cutoff"
filtered_rec_local = cutoff100(filtered_rec_local)
filtered_rec_online = cutoff100(filtered_rec_online)
neg_filtered_rec_local = cutoff100(neg_filtered_rec_local)
neg_filtered_rec_online = cutoff100(neg_filtered_rec_online)

print "Finished post processing"

print "Starting evaluation and submission"
e = Evaluation(prep_cache_dir=prep_dir)
e.format_submission(filtered_rec_local, filtered_rec_online, sol_dir,
                    '_eval_scores_hmf1_filtered_1000to100.txt',
                    '_subm_hmf1_filtered_1000to100.txt')
e.format_submission(neg_filtered_rec_local, neg_filtered_rec_online, sol_dir,
                    '_eval_scores_hmf1_negfiltered_1000to100.txt',
                    '_subm_hmf1_negfiltered_1000to100.txt')
print "Done"
Exemple #17
0
def run_epoch(data, is_training, model, optimizer, transfer=False):
	
	# Make batches
	data_loader = torch.utils.data.DataLoader(
		data,
		batch_size=10,
		shuffle=True,
		num_workers=4,
		drop_last=False)

	losses = []
	actual = []
	expected = []

	if is_training:
		model.train()
	else:
		model.eval()
	
	for batch in data_loader:
		# Unpack training instances
		pid_title = torch.unsqueeze(Variable(batch['pid_title']), 1).cuda() # Size: batch_size x 1 x title_length=40
		pid_title_mask = torch.unsqueeze(Variable(batch['pid_title_mask']), 1).cuda() # Size: batch_size x 1 x title_length=40
		pid_body = torch.unsqueeze(Variable(batch['pid_body']), 1).cuda() # Size: batch_size x 1 x body_length=100
		pid_body_mask = torch.unsqueeze(Variable(batch['pid_body_mask']), 1).cuda() # Size: batch_size x 1 x body_length=100
		candidate_title = Variable(batch['candidate_titles']).cuda() # Size: batch_size x # candidates (21 in training) x title_length=40
		candidate_title_mask = Variable(batch['candidate_titles_mask']).cuda() # Size: batch_size x # candidates (21 in training) x title_length=40
		candidate_body = Variable(batch['candidate_body']).cuda() # Size: batch_size x # candidates (21 in training) x body_length=100
		candidate_body_mask = Variable(batch['candidate_body_mask']).cuda() # Size: batch_size x # candidates (21 in training) x body_length=40
		
		if is_training:
			optimizer.zero_grad()
		
		# Run text through model
		pid_title = model(pid_title) # batch_size x 1 x output_size=500 x title_length=40(-kernel_size+1 if CNN)
		pid_body = model(pid_body) # batch_size x 1 x output_size=500 x body_length=100(-kernel_size+1 if CNN)
		candidate_title = model(candidate_title) # batch_size x # candidates (21 in training) x output_size=500 x title_length=40(-kernel_size+1 if CNN)
		candidate_body = model(candidate_body) # batch_size x # candidates (21 in training) x output_size=500 x body_length=100(-kernel_size+1 if CNN)
		
		pid_title_mask = torch.unsqueeze(pid_title_mask, 2).expand_as(pid_title) # batch_size x 1 x output_size=500 x title_length=40(-kernel_size+1 if CNN)
		pid_body_mask = torch.unsqueeze(pid_body_mask, 2).expand_as(pid_body) # batch_size x 1 x output_size=500 x body_length=100(-kernel_size+1 if CNN)
		candidate_title_mask = torch.unsqueeze(candidate_title_mask, 2).expand_as(candidate_title)# batch_size x # candidates (21 in training) x output_size=500 x title_length=40(-kernel_size+1 if CNN)
		candidate_body_mask = torch.unsqueeze(candidate_body_mask, 2).expand_as(candidate_body) # batch_size x # candidates (21 in training) x output_size=500 x body_length=100(-kernel_size+1 if CNN)

		good_title = torch.sum(pid_title * pid_title_mask, 3) # batch_size x 1 x output_size=500
		good_body = torch.sum(pid_body * pid_body_mask, 3) # batch_size x 1 x output_size=500
		cand_titles = torch.sum(candidate_title * candidate_title_mask, 3) # batch_size x # candidates (21 in training) x output_size=500
		cand_bodies = torch.sum(candidate_body * candidate_body_mask, 3) # batch_size x # candidates (21 in training) x output_size=500
		
		good_tensor = (good_title + good_body)/2 # batch_size x 1 x output_size=500
		cand_tensor = (cand_titles + cand_bodies)/2 # batch_size x # candidates (21 in training) x output_size=500
		
		if is_training:
			l = loss(good_tensor, cand_tensor, 1.0)
			l.backward()
			losses.append(l.cpu().data[0])
			optimizer.step()
		else:
			similarity = cosine_sim(good_tensor.expand_as(cand_tensor), cand_tensor, dim=2)
			if transfer:
				similarity = torch.FloatTensor(similarity.data.cpu().numpy())
			else:
				similarity = similarity.data.cpu().numpy()
			if transfer:
				labels = batch['labels']
			else:
				labels = batch['labels'].numpy()
			def predict(sim, labels):
				predictions = []
				for i in range(sim.shape[0]):
					sorted_cand = (-sim[i]).argsort()
					predictions.append(labels[i][sorted_cand])
				return predictions
			if transfer:
				for sim in similarity:
					actual.append(sim)
				expected.extend(labels.view(-1))
			else:
				l = predict(similarity, labels)
				losses.extend(l)

	if is_training:
		avg_loss = np.mean(losses)
		return avg_loss
	else:
		if transfer:
			auc = AUCMeter()
			auc.reset()
			auc.add(torch.cat(actual), torch.LongTensor(expected))
			return auc.value(max_fpr=0.05)
		else:
			e = Evaluation(losses)
			MAP = e.MAP()*100
			MRR = e.MRR()*100
			P1 = e.Precision(1)*100
			P5 = e.Precision(5)*100
			return (MAP, MRR, P1, P5)
Exemple #18
0
# for each experiment, tuning num_epoch times
NUM_EPOCH = 1
BATCH_SIZE = 899
BETA = 1
EPSILON = 0.1

 # create environment
dist1 = Distribution(id=0, vals=[2], probs=[1])
dist2 = Distribution(id=1, vals=[5], probs=[1])
dist3 = Distribution(id=2, vals=[2,8], probs=[0.5,0.5])

env = Environment(total_bandwidth = 10,\
    distribution_list=[dist1,dist2,dist3], \
    mu_list=[1,2,3], lambda_list=[3,2,1],\
    num_of_each_type_distribution_list=[300,300,300])
evaluation = Evaluation()

class PPODataset(Dataset):
    def __init__(self, obs_list, action_list, advantage_list):
        self.obs_list = torch.cat(obs_list, 0)
        self.action_list = torch.tensor(action_list, dtype=torch.int64)
        self.advantage_list = torch.tensor(advantage_list, dtype=torch.float32)

    def __getitem__(self, index):
        return self.obs_list[index,:], self.action_list[index], self.advantage_list[index]
    
    def __len__(self):
        return self.obs_list.shape[0]

#1. Initialize network
class PPO(nn.Module):
Exemple #19
0
    def train(corpusName,
              binary=True,
              verbalEx=False,
              printReport=False,
              enableSerialization=False):

        languageName = corpusName.split('/')[-1]
        if languageName.strip() == '':
            languageName = corpusName.split('/')[-2]
        dumpingFolder = '/Users/hazemalsaied/Parseme/MWEIdSys/Serialization/'
        if not os.path.exists(dumpingFolder):
            os.makedirs(dumpingFolder)

        if enableSerialization:
            # Reading the corpus
            corpus = Corpus(corpusName, printReport=printReport)
            # corpus = SPMLRCorpus(corpusName, verbalEx=verbalEx, printReport=printReport)
            with open(os.path.join(dumpingFolder, languageName + '.pickle'),
                      'wb') as f:
                pickle.dump(corpus, f)
        else:
            with open(os.path.join(dumpingFolder, languageName + '.pickle'),
                      'rb') as f:
                corpus = pickle.load(f)

        # creating an initial report
        if printReport:
            languageName = corpusName.split('/')[-1]
            if len(languageName) > 0:
                Evaluation.resultsPath = os.path.join(Evaluation.resultsPath,
                                                      languageName)
            else:
                Evaluation.resultsPath = os.path.join(
                    Evaluation.resultsPath,
                    corpusName.split('/')[-2])
            if not os.path.exists(Evaluation.resultsPath):
                os.makedirs(Evaluation.resultsPath)

            printingPath = os.path.join(Evaluation.resultsPath, 'Readme.md')
            staticParsingFile = open(printingPath, 'w')
            result = '##Number of Sentences: ' + str(
                corpus.sentNum) + '\n##Number of MWEs: ' + str(corpus.mweNum)
            staticParsingFile.write(result)

        # Spliting the data in train and testing
        sentenceNum = int(len(corpus.sentences) * 0.8)
        trainingSents = corpus.sentences[0:sentenceNum]
        testingSents = corpus.sentences[sentenceNum + 1:]

        # Training classifier
        cls = Train.classify(trainingSents,
                             printReport=printReport,
                             binary=binary)

        # Parsing the test phrases
        for sent in testingSents:
            Parser.parse(cls[0], cls[1], sent, binary=binary)

        # creating a parsing report
        if printReport:
            Train.createParsingReport(testingSents)

        goldCorpus = ''
        for sent in testingSents:
            goldCorpus += sent.getCorpusText() + '\n'
        goldtestingCorpusPath = os.path.join(
            os.path.join('/Users/hazemalsaied/Parseme/MWEIdSys/Results/',
                         languageName), languageName + '.gold')
        goldtestingCorpusFile = open(goldtestingCorpusPath, 'w')
        goldtestingCorpusFile.write(goldCorpus)

        predCorpus = ''
        for sent in testingSents:
            predCorpus += sent.getCorpusText(gold=False) + '\n'
        goldtestingCorpusPath = os.path.join(
            os.path.join('/Users/hazemalsaied/Parseme/MWEIdSys/Results/',
                         languageName), languageName + '.pred')
        goldtestingCorpusFile = open(goldtestingCorpusPath, 'w')
        goldtestingCorpusFile.write(predCorpus)

        # Evaluation
        Evaluation.evaluate(testingSents, printReport=True)

        # Enjoying testing
        testingCorpusPath = '/Users/hazemalsaied/Parseme/MWEIdSys/Corpora/testing/'
        while True:
            sentId = raw_input('again? ')
            sents = [s for s in corpus.sentences if s.sentid == sentId]
            if sents is None or sents == []:
                testingCorpus = Corpus(testingCorpusPath,
                                       printReport=printReport)
                for sent in testingCorpus.sentences:
                    Parser.parse(cls[0], cls[1], sent, binary=binary)
                    print sent
            else:
                sent = sents[0]
                print sent
                Parser.parse(cls[0], cls[1], sent, binary=binary)
                print sent