def allow_migrate(self, db, app_label, model=None, **hints): model_name = hints.get("model_name", None) if app_label and model_name: model = get_model_by_name(app_label, model_name) if model: non_db = self.db_for_read(model) if non_db == db: return check_can_migrate(db) if not non_db and not backend_is_non_db(db): return True return False else: return True
def allow_migrate(self, db, app_label, model=None, **hints): model_name = hints.get('model_name', None) if app_label and model_name: model = get_model_by_name(app_label, model_name) if model: non_db = self.db_for_read(model) if non_db == db: return check_can_migrate(db) if not non_db and not backend_is_non_db(db): return True return False else: return True
def plot_top_loss(k=10): df = pd.read_csv(os.path.join(cfg.ds_folder, 'new_valid.csv'), encoding='utf8') folder = os.path.join(cfg.ds_folder, 'train') _, tfms = get_transforms(cfg.img_size) model = get_model_by_name(cfg.model_name) model.load_state_dict( torch.load(glob("../runs/exp15/best*")[0])['state_dict']) model.to("cuda:0") model.eval() error_list = [] error_index = [] for i in tqdm(range(len(df))): with torch.no_grad(): filename = os.path.join(folder, str(df['id'][i]) + ".jpg") label = df['label'][i] img = cv2.cvtColor(cv2.imread(filename), cv2.COLOR_BGR2RGB) img = tfms(image=img)['image'].astype('float32').transpose(2, 0, 1) img = torch.from_numpy( img.reshape(-1, 3, cfg.img_size, cfg.img_size)).to("cuda:0") pred = model(img) _, pred_label = torch.max(pred, dim=1) yes = (pred_label.cpu() == label).item() if yes: continue else: error_score = pred.cpu().numpy().squeeze()[pred_label] error_list.append(error_score) error_index.append(i) ind = np.argpartition(np.array(error_list), -k)[-k:] error_img_index = np.array(error_index)[ind] # plot plt.figure(figsize=(12, 6)) for i in range(k): plt.subplot(2, 5, i + 1) plt.imshow( Image.open( os.path.join(folder, str(df['id'][error_img_index[i]]) + ".jpg")).convert('RGB')) print("filename is", str(df['id'][error_img_index[i]]) + ".jpg") plt.title("label {}".format(df['label'][error_img_index[i]])) plt.show()
def train_dag(dag, train_data, sample_weight=None): models = dict() data_cache = dict() # happens inside booster isarray_0 = isinstance(train_data[0], np.ndarray) isarray_1 = isinstance(train_data[1], np.ndarray) if isarray_0 and isarray_1: train_data = (pd.DataFrame(train_data[0]), pd.Series(train_data[1])) data_cache[dag['input'][2]] = train_data models['input'] = True def unfinished_models(): return [m for m in dag if m not in models] def data_available(): return [m for m in dag if data_ready(dag[m][0], data_cache)] def next_methods(): return [m for m in unfinished_models() if m in data_available()] while next_methods(): for m in next_methods(): # obtain the data features, targets, *rest = get_data(dag[m][0], data_cache) if rest: sample_weight = rest[0] ModelClass, model_params = utils.get_model_by_name(dag[m][1]) for p in model_params: if model_params[p] == 'True': model_params[p] = True elif model_params[p] == 'False': model_params[p] = False elif model_params[p] == 'None': model_params[p] = None out_name = dag[m][2] if dag[m][1][0] == 'stacker': sub_dags, initial_dag, input_data = \ dag_parser.extract_subgraphs(dag, m) model_params = dict(sub_dags=sub_dags, initial_dag=initial_dag) model = ModelClass(**model_params) features, targets = data_cache[input_data] elif isinstance(out_name, list): model = ModelClass(len(out_name), **model_params) else: model = ModelClass(**model_params) # build the model # some models cannot handle cases with only one class, we also need to check we are not working with a list # of inputs for an aggregator if custom_models.is_predictor(model) and isinstance( targets, pd.Series) and len(targets.unique()) == 1: model = custom_models.ConstantModel(targets.iloc[0]) models[m] = fit_model(model, features, targets, sample_weight=sample_weight) # needed to update model if the result was cached model = models[m] # use the model to process the data if isinstance(model, custom_models.Stacker): data_cache[out_name] = model.train, targets.ix[ model.train.index] continue if isinstance(model, custom_models.Aggregator): data_cache[out_name] = model.aggregate(features, targets) continue if custom_models.is_transformer(model): trans = model.transform(features) else: # this is a classifier not a preprocessor trans = features # the data do not change if isinstance(features, pd.DataFrame): targets = pd.Series(list(model.predict(features)), index=features.index) else: # this should happen only inside booster targets = pd.Series(list(model.predict(features))) # save the outputs # the previous model divided the data into several data-sets if isinstance(trans, list): if isinstance(model, custom_models.KMeansSplitter ) and sample_weight is not None: trans = [(x, targets.loc[x.index], sample_weight[model.weight_idx[i]]) for i, x in enumerate(trans) ] # need to divide the targets and the weights else: trans = [(x, targets.loc[x.index]) for x in trans] # need to divide the targets for i in range(len(trans)): # save all the data to the cache data_cache[out_name[i]] = trans[i] else: if isinstance(features, pd.DataFrame): # we have only one output, can be numpy array trans = pd.DataFrame(trans, index=features.index) else: trans = pd.DataFrame(trans) trans.dropna(axis='columns', how='all', inplace=True) data_cache[out_name] = (trans, targets) # save it return models
def train_dag(dag, train_data): models = dict() data_cache = dict() data_cache[dag['input'][2]] = train_data models['input'] = True unfinished_models = lambda: [m for m in dag if m not in models] data_available = lambda: [m for m in dag if data_ready(dag[m][0], data_cache)] next_methods = lambda: [m for m in unfinished_models() if m in data_available()] while next_methods(): for m in next_methods(): # print("Processing:", m) # obtain the data features, targets = get_data(dag[m][0], data_cache) ModelClass, model_params = utils.get_model_by_name(dag[m][1]) out_name = dag[m][2] if isinstance(out_name, list): model = ModelClass(len(out_name), **model_params) else: if isinstance(ModelClass(), feature_selection.SelectKBest): if 'feat_frac' not in model_params: model_params['feat_frac'] = 1.0 model_params = model_params.copy() model_params['k'] = max(1, int(model_params['feat_frac']*(features.shape[1]-1))) del model_params['feat_frac'] if isinstance(ModelClass(), decomposition.PCA): if 'feat_frac' not in model_params: model_params['feat_frac'] = 1.0 model_params = model_params.copy() model_params['n_components'] = max(1, int(model_params['feat_frac']*(features.shape[1]-1))) del model_params['feat_frac'] model = ModelClass(**model_params) # build the model # some models cannot handle cases with only one class, we also need to check we are not working with a list # of inputs for an aggregator if isinstance(model, custom_models.Predictor) and isinstance(targets, pd.Series) and len(targets.unique()) == 1: model = custom_models.ConstantModel(targets.iloc[0]) models[m] = model.fit(features, targets) # use the model to process the data if isinstance(model, custom_models.Aggregator): data_cache[out_name] = model.aggregate(features, targets) continue if isinstance(model, custom_models.Transformer): trans = model.transform(features) else: # this is a classifier not a preprocessor trans = features # the data do not change targets = pd.Series(list(model.predict(features)), index=features.index) # save the outputs if isinstance(trans, list): # the previous model divided the data into several data-sets trans = [(x, targets.loc[x.index]) for x in trans] # need to divide the targets for i in range(len(trans)): data_cache[out_name[i]] = trans[i] # save all the data to the cache else: trans = pd.DataFrame(trans, index=features.index) # we have only one output, can be numpy array data_cache[out_name] = (trans, targets) # save it return models
parser = ArgumentParser() parser.add_argument("--weights", type=str, default="../runs/exp12/") parser.add_argument("--tta", type=str, default='no') opt = parser.parse_args() cfg = Config() desc_test = os.path.join(cfg.ds_folder, 'test.csv') _, transform_test = get_transforms(cfg.img_size) valid_data = TestDataset(desc_test, data_folder=os.path.join(cfg.ds_folder, "test"), transform=transform_test) test_loader = DataLoader(dataset=valid_data, batch_size=cfg.bs, shuffle=False) models = [] for path in get_kfold_model(opt.weights): model = get_model_by_name(cfg.model_name) model.load_state_dict(torch.load(path)['state_dict']) if opt.tta == 'yes': model = tta.ClassificationTTAWrapper(model, get_tta_transforms(), merge_mode='mean') models.append(model) rst = [[] for i in range(len(models))] for index in range(len(models)): net = models[index] net.to("cuda") net.eval() files = [] with torch.no_grad():
def train_dag(dag, train_data, sample_weight=None): models = dict() data_cache = dict() if isinstance(train_data[0], np.ndarray) and isinstance(train_data[1], np.ndarray): # happens inside booster train_data = (pd.DataFrame(train_data[0]), pd.Series(train_data[1])) data_cache[dag['input'][2]] = train_data models['input'] = True unfinished_models = lambda: [m for m in dag if m not in models] data_available = lambda: [m for m in dag if data_ready(dag[m][0], data_cache)] next_methods = lambda: [m for m in unfinished_models() if m in data_available()] while next_methods(): for m in next_methods(): # print("Processing:", m) # obtain the data features, targets, *rest = get_data(dag[m][0], data_cache) if rest: sample_weight = rest[0] ModelClass, model_params = utils.get_model_by_name(dag[m][1]) out_name = dag[m][2] if dag[m][1][0] == 'stacker': sub_dags, initial_dag, input_data = extract_subgraphs(dag, m) model_params = dict(sub_dags=sub_dags, initial_dag=initial_dag) model = ModelClass(**model_params) features, targets = data_cache[input_data] elif isinstance(out_name, list): model = ModelClass(len(out_name), **model_params) else: if isinstance(ModelClass(), feature_selection.SelectKBest): if 'feat_frac' not in model_params: model_params['feat_frac'] = 1.0 model_params = model_params.copy() model_params['k'] = max(1, int(model_params['feat_frac']*(features.shape[1]-1))) del model_params['feat_frac'] if isinstance(ModelClass(), decomposition.PCA): if 'feat_frac' not in model_params: model_params['feat_frac'] = 1.0 model_params = model_params.copy() model_params['n_components'] = max(1, int(model_params['feat_frac']*(features.shape[1]-1))) del model_params['feat_frac'] model = ModelClass(**model_params) # build the model # some models cannot handle cases with only one class, we also need to check we are not working with a list # of inputs for an aggregator if custom_models.is_predictor(model) and isinstance(targets, pd.Series) and len(targets.unique()) == 1: model = custom_models.ConstantModel(targets.iloc[0]) models[m] = fit_model(model, features, targets, sample_weight=sample_weight) model = models[m] # needed to update model if the result was cached # use the model to process the data if isinstance(model, custom_models.Stacker): data_cache[out_name] = model.train, targets.ix[model.train.index] continue if isinstance(model, custom_models.Aggregator): data_cache[out_name] = model.aggregate(features, targets) continue if custom_models.is_transformer(model): trans = model.transform(features) else: # this is a classifier not a preprocessor trans = features # the data do not change if isinstance(features, pd.DataFrame): targets = pd.Series(list(model.predict(features)), index=features.index) else: # this should happen only inside booster targets = pd.Series(list(model.predict(features))) # save the outputs if isinstance(trans, list): # the previous model divided the data into several data-sets if isinstance(model, custom_models.KMeansSplitter) and sample_weight is not None: trans = [(x, targets.loc[x.index], sample_weight[model.weight_idx[i]]) for i, x in enumerate(trans)] # need to divide the targets and the weights else: trans = [(x, targets.loc[x.index]) for x in trans] # need to divide the targets for i in range(len(trans)): data_cache[out_name[i]] = trans[i] # save all the data to the cache else: if isinstance(features, pd.DataFrame): trans = pd.DataFrame(trans, index=features.index) # we have only one output, can be numpy array else: trans = pd.DataFrame(trans) trans.dropna(axis='columns', how='all', inplace=True) data_cache[out_name] = (trans, targets) # save it return models