class TestBalance(unittest.TestCase):
   longMessage=True
   known_values = {'y':np.array([  1,   1,   0,   0,   0,   1,   0,   0,   0]), 
                   'w':np.array([  1,   1,   0,   1,   0,   1,   0,   0,   1]), 
                   'o':np.array([0.4, 0.4, 0.0, 0.6, 0.0, 0.4, 0.0, 0.0, 0.6])}

   def test_detect_is_none(self):
      self.assertTrue(balance_masked_weights(self.known_values['y'], None) is None)


   def test_balance_masked_weights(self):
      resp = balance_masked_weights(self.known_values['y'], self.known_values['w'])
      msg = '\nGot: %s\nShould be: %s'%( str(resp), str(self.known_values['o']))

      self.assertTrue((resp==self.known_values['o']).all(), msg)


   def test_source_unchanged(self):
      prev = hash(tuple(self.known_values['w']))
      new = balance_masked_weights(self.known_values['y'], self.known_values['w'])
      after = hash(tuple(self.known_values['w']))
      self.assertEqual(prev, after)
   

   def test_one_class_empty(self):
      w = np.array([  1,   1,   0,   1])
      y = np.array([  1,   1,   0,   1])
      resp1 = balance_masked_weights(y, w)
      resp2 = balance_masked_weights(1-y, w)
      self.assertTrue((w==resp1).all())
      self.assertTrue((w==resp2).all())
 def test_one_class_empty(self):
    w = np.array([  1,   1,   0,   1])
    y = np.array([  1,   1,   0,   1])
    resp1 = balance_masked_weights(y, w)
    resp2 = balance_masked_weights(1-y, w)
    self.assertTrue((w==resp1).all())
    self.assertTrue((w==resp2).all())
Beispiel #3
0
def make_av_std(series, key, multitask=False, Noutputs=-1):
   if multitask:
      assert Noutputs>0
      arr = np.array([[x[key%i] for i in range(Noutputs)] for x in series]).mean(axis=1)
   else:
      arr = np.array([x[key] for x in series])
   n=np.sqrt(arr.shape[0]-1)
   if n>0: 
      std=arr.std(axis=0)/n
   else:
      std=np.zeros(arr.shape[1])
   return arr.mean(axis=0), std
class TestBalanceScore(unittest.TestCase):
   known_values = {'y_true':np.array([  1,   1,   0,   0,   0,   1,   0,   0,   0]),
                        'w':np.array([  1,   1,   0,   1,   0,   1,   0,   0,   1]),
                   'y_pred':np.array([  0,   1,   1,   0,   0,   1,   0,   0,   1]),
                     'bacc':7.0/12.0,
                      'acc':0.6} 

   def test_accuracy(self):
      calc_acc = masked_balanced_score(self.known_values['y_pred'], self.known_values['y_true'], self.known_values['w'], balance=False)
      self.assertAlmostEqual(calc_acc, self.known_values['acc'], delta=1e-9)

   
   def test_balanced_accuracy(self):
      calc_acc = masked_balanced_score(self.known_values['y_pred'], self.known_values['y_true'], self.known_values['w'], balance=True)
      self.assertAlmostEqual(calc_acc, self.known_values['bacc'], delta=1e-9)
 def test_scaling_with_none_categorical(self):
     y = np.array([[0, 1] if x else [1, 0] for x in self.y])
     expected_output = self.w
     actual_output = scale_weights(y, None, self.s)
     msg = '\nExpected : %s\nActual : %s' % (str(expected_output),
                                             str(actual_output))
     self.assertTrue((expected_output == actual_output).all(), msg)
class TestScaling(unittest.TestCase):
    longMessage = True
    w = np.ones(9)
    s = 0.3
    y = np.array([1, 1, 0, 0, 0, 1, 0, 0, 0])
    scaled_output = np.array([0.3, 0.3, 0.7, 0.7, 0.7, 0.3, 0.7, 0.7, 0.7])

    def test_non_w_modify(self):
        pre = hash(tuple(self.w))
        _ = scale_weights(self.y, self.w, self.s)
        after = hash(tuple(self.w))
        self.assertEqual(pre, after)

    def test_scaling_binary(self):
        expected_output = self.scaled_output
        actual_output = scale_weights(self.y, self.w, self.s)
        msg = '\nExpected : %s\nActual : %s' % (str(expected_output),
                                                str(actual_output))
        self.assertTrue((expected_output == actual_output).all(), msg)

    def test_scaling_categorical(self):
        y = np.array([[0, 1] if x else [1, 0] for x in self.y])
        expected_output = self.scaled_output
        actual_output = scale_weights(y, self.w, self.s)
        msg = '\nExpected : %s\nActual : %s' % (str(expected_output),
                                                str(actual_output))
        self.assertTrue((expected_output == actual_output).all(), msg)

    def test_scaling_with_none_binary(self):
        expected_output = self.w
        actual_output = scale_weights(self.y, None, self.s)
        msg = '\nExpected : %s\nActual : %s' % (str(expected_output),
                                                str(actual_output))
        self.assertTrue((expected_output == actual_output).all(), msg)

    def test_scaling_with_none_categorical(self):
        y = np.array([[0, 1] if x else [1, 0] for x in self.y])
        expected_output = self.w
        actual_output = scale_weights(y, None, self.s)
        msg = '\nExpected : %s\nActual : %s' % (str(expected_output),
                                                str(actual_output))
        self.assertTrue((expected_output == actual_output).all(), msg)
Beispiel #7
0
def tasks_balanced_scores(y_pred, y_true, weights, balance=True, auc=False):
   result = []
   for i, y_p in enumerate(y_pred):
      y_t = y_true[i]
      w = weights['out%i'%i]
      assert len(w)==len(y_t)
      assert len(w)==len(y_p)
      if not auc:
         to_use = y_p.round(0)
      else:
         to_use = y_p
      result.append(masked_balanced_score(to_use, y_t, w, balance=balance, auc=auc))
   return np.array(result)
Beispiel #8
0
                           verbose=1)
        tr_score, _ = make_av_std([result.history], metric, is_multitask,
                                  N_outputs)
        tst_score, _ = make_av_std([result.history], 'val_%s' % metric,
                                   is_multitask, N_outputs)
        logger.info('After relaxation: Train:  %8.3f   Val: %8.3f' %
                    (tr_score.max(), tst_score.max()))
    history.append(result.history)

#====== Average ===============
if is_multitask:
    metric = 'out%i_' + metric
cv_av, cv_std = make_av_std(history, metric, is_multitask, N_outputs)
cv_val_av, cv_val_std = make_av_std(history, 'val_%s' % metric, is_multitask,
                                    N_outputs)
val_loss = np.array([x['val_loss'] for x in history]).mean(axis=0)
if is_multitask:
    best_cv_idx = np.argmax(val_loss)  #cv_val_av.argmax()
else:
    best_cv_idx = cv_val_av.argmax()

logging.info('Val loss: %i-%8.6f' % (best_cv_idx, val_loss[best_cv_idx]))
test_epochs = args.epochs  #best_cv_idx+1

#====== test ===========
logger.info('Testing')
test_stuff = make_test_data(
    data_stuff, test_idx, N_inputs,
    config)  #slice_data(data_stuff, test_idx, N_inputs)
train_stuff = slice_data(data_stuff, train_idx, N_inputs)
Beispiel #9
0
maxit = 100

while len(new_negatives) > 0 and iteration < maxit:
    logging.info('PU iter: %i' % iteration)
    cv_results = make_cv_on_current_set(data_stuff, indices_to_use, config)
    best_it = cv_results['it']
    score_at_best = '%5i ' % best_it
    for k in ['train', 'val']:
        v = cv_results[k]
        score_at_best += '%s %8.3f (%.3f) ' % (k, v[0][best_it], v[1][best_it])
    logging.info("Best CV iter: {:s}".format(score_at_best))

    #update unlabelled
    new_negatives, stats, maybe_drug = extract_reliable_negatives(
        cv_results['model'], data_stuff, unlabelled_idx, th=args.th)
    unlabelled_idx = np.array(
        [idx for idx in unlabelled_idx if idx not in new_negatives])

    #update RN
    if iteration == 1:
        negative_idx = new_negatives
        indices_to_use = np.append(positive_idx, negative_idx)
    else:
        negative_idx = np.append(negative_idx, new_negatives)
        indices_to_use = np.append(indices_to_use, new_negatives)

    #update weights for next iteration
    data_stuff[2] = get_new_weights(data_stuff[1], negative_idx)

    N_neg, N_unlabelled = len(negative_idx), len(unlabelled_idx)
    logging.info('Reliable negatives: %i (new %i), unlabelled data: %i' %
                 (N_neg, len(new_negatives), N_unlabelled))