def gini_impurity(rows): counts = class_counts(rows) impurity = 1 for label in counts: prob_of_label = counts[label] / float(len(rows)) impurity -= prob_of_label**2 return impurity
def init_low_model(self, train_set, K): counts = utils.class_counts(train_set) self.dims = [train_set.b_dims, train_set.c_dims] self.model = model.Low(train_set.a_dims, train_set.b_dims, train_set.c_dims, K, counts) if torch.cuda.device_count() > 1: logger.info(f'Using {torch.cuda.device_count()} GPUs') self.model = DataParallelPassthrough(self.model) self.model.to(self.device) self.accum_gradients.append( torch.zeros_like(self.model.A, dtype=torch.float64).to(self.device)) self.accum_gradients.append( torch.zeros_like(self.model.B, dtype=torch.float64).to(self.device)) self.accum_gradients.append( torch.zeros_like(self.model.C, dtype=torch.float64).to(self.device)) self.gradients.append( torch.zeros_like(self.model.A, dtype=torch.float64).to(self.device)) self.gradients.append( torch.zeros_like(self.model.B, dtype=torch.float64).to(self.device)) self.gradients.append( torch.zeros_like(self.model.C, dtype=torch.float64).to(self.device)) self.scale = (train_set.b_dims[1] / 5.) * (train_set.c_dims[0] / 6.)
def __init__(self, dataset): """ A leaf node that makes determine the label """ possible_classes = class_counts(dataset) mx = 0 l = None s = 0 for label in possible_classes: s += possible_classes[label] for label in possible_classes: if possible_classes[label] >= mx: mx = possible_classes[label] l = label self.prediction_label = l self.prediction_confidence = mx / s
def init_loaders(self, train_set, val_set): # Pos_weight self.train_loader = torch.utils.data.DataLoader( train_set, batch_size=self.params['batch_size'], shuffle=True, num_workers=config.num_workers) self.val_loader = torch.utils.data.DataLoader( val_set, batch_size=self.params['batch_size'], shuffle=False, num_workers=config.num_workers) self.train_T = ceil( len(self.train_loader.sampler) / (self.params['batch_size'] * 12)) self.eval_T = ceil( len(self.val_loader.sampler) / (self.params['batch_size'] * 4)) counts = utils.class_counts(train_set) self.loss_fn = torch.nn.BCEWithLogitsLoss( pos_weight=torch.tensor(counts[0] / counts[1]))
def __init__(self, rows): self.predictions = class_counts(rows)
def __init__(self, df): counts = np.column_stack(class_counts(df)) self.prediction = {row[0]: row[1] for row in counts}