Ejemplo n.º 1
0
 def allow_migrate(self, db, app_label, model=None, **hints):
     model_name = hints.get("model_name", None)
     if app_label and model_name:
         model = get_model_by_name(app_label, model_name)
     if model:
         non_db = self.db_for_read(model)
         if non_db == db:
             return check_can_migrate(db)
         if not non_db and not backend_is_non_db(db):
             return True
         return False
     else:
         return True
Ejemplo n.º 2
0
 def allow_migrate(self, db, app_label, model=None, **hints):
     model_name = hints.get('model_name', None)
     if app_label and model_name:
         model = get_model_by_name(app_label, model_name)
     if model:
         non_db = self.db_for_read(model)
         if non_db == db:
             return check_can_migrate(db)
         if not non_db and not backend_is_non_db(db):
             return True
         return False
     else:
         return True
def plot_top_loss(k=10):
    df = pd.read_csv(os.path.join(cfg.ds_folder, 'new_valid.csv'),
                     encoding='utf8')
    folder = os.path.join(cfg.ds_folder, 'train')
    _, tfms = get_transforms(cfg.img_size)

    model = get_model_by_name(cfg.model_name)
    model.load_state_dict(
        torch.load(glob("../runs/exp15/best*")[0])['state_dict'])
    model.to("cuda:0")
    model.eval()

    error_list = []
    error_index = []
    for i in tqdm(range(len(df))):
        with torch.no_grad():
            filename = os.path.join(folder, str(df['id'][i]) + ".jpg")
            label = df['label'][i]
            img = cv2.cvtColor(cv2.imread(filename), cv2.COLOR_BGR2RGB)
            img = tfms(image=img)['image'].astype('float32').transpose(2, 0, 1)
            img = torch.from_numpy(
                img.reshape(-1, 3, cfg.img_size, cfg.img_size)).to("cuda:0")
            pred = model(img)
            _, pred_label = torch.max(pred, dim=1)
            yes = (pred_label.cpu() == label).item()
            if yes:
                continue
            else:
                error_score = pred.cpu().numpy().squeeze()[pred_label]
                error_list.append(error_score)
                error_index.append(i)
    ind = np.argpartition(np.array(error_list), -k)[-k:]
    error_img_index = np.array(error_index)[ind]

    # plot
    plt.figure(figsize=(12, 6))
    for i in range(k):
        plt.subplot(2, 5, i + 1)
        plt.imshow(
            Image.open(
                os.path.join(folder,
                             str(df['id'][error_img_index[i]]) +
                             ".jpg")).convert('RGB'))
        print("filename is", str(df['id'][error_img_index[i]]) + ".jpg")
        plt.title("label {}".format(df['label'][error_img_index[i]]))
    plt.show()
Ejemplo n.º 4
0
def train_dag(dag, train_data, sample_weight=None):
    models = dict()
    data_cache = dict()

    # happens inside booster
    isarray_0 = isinstance(train_data[0], np.ndarray)
    isarray_1 = isinstance(train_data[1], np.ndarray)
    if isarray_0 and isarray_1:
        train_data = (pd.DataFrame(train_data[0]), pd.Series(train_data[1]))

    data_cache[dag['input'][2]] = train_data
    models['input'] = True

    def unfinished_models():
        return [m for m in dag if m not in models]

    def data_available():
        return [m for m in dag if data_ready(dag[m][0], data_cache)]

    def next_methods():
        return [m for m in unfinished_models() if m in data_available()]

    while next_methods():

        for m in next_methods():
            # obtain the data
            features, targets, *rest = get_data(dag[m][0], data_cache)
            if rest:
                sample_weight = rest[0]
            ModelClass, model_params = utils.get_model_by_name(dag[m][1])

            for p in model_params:
                if model_params[p] == 'True':
                    model_params[p] = True
                elif model_params[p] == 'False':
                    model_params[p] = False
                elif model_params[p] == 'None':
                    model_params[p] = None

            out_name = dag[m][2]
            if dag[m][1][0] == 'stacker':
                sub_dags, initial_dag, input_data = \
                    dag_parser.extract_subgraphs(dag, m)
                model_params = dict(sub_dags=sub_dags, initial_dag=initial_dag)
                model = ModelClass(**model_params)
                features, targets = data_cache[input_data]
            elif isinstance(out_name, list):
                model = ModelClass(len(out_name), **model_params)
            else:
                model = ModelClass(**model_params)

            # build the model
            # some models cannot handle cases with only one class, we also need to check we are not working with a list
            # of inputs for an aggregator
            if custom_models.is_predictor(model) and isinstance(
                    targets, pd.Series) and len(targets.unique()) == 1:
                model = custom_models.ConstantModel(targets.iloc[0])
            models[m] = fit_model(model,
                                  features,
                                  targets,
                                  sample_weight=sample_weight)
            # needed to update model if the result was cached
            model = models[m]

            # use the model to process the data
            if isinstance(model, custom_models.Stacker):
                data_cache[out_name] = model.train, targets.ix[
                    model.train.index]
                continue
            if isinstance(model, custom_models.Aggregator):
                data_cache[out_name] = model.aggregate(features, targets)
                continue
            if custom_models.is_transformer(model):
                trans = model.transform(features)
            else:  # this is a classifier not a preprocessor
                trans = features  # the data do not change
                if isinstance(features, pd.DataFrame):
                    targets = pd.Series(list(model.predict(features)),
                                        index=features.index)
                else:  # this should happen only inside booster
                    targets = pd.Series(list(model.predict(features)))

            # save the outputs
            # the previous model divided the data into several data-sets
            if isinstance(trans, list):
                if isinstance(model, custom_models.KMeansSplitter
                              ) and sample_weight is not None:
                    trans = [(x, targets.loc[x.index],
                              sample_weight[model.weight_idx[i]])
                             for i, x in enumerate(trans)
                             ]  # need to divide the targets and the weights
                else:
                    trans = [(x, targets.loc[x.index])
                             for x in trans]  # need to divide the targets
                for i in range(len(trans)):
                    # save all the data to the cache
                    data_cache[out_name[i]] = trans[i]
            else:
                if isinstance(features, pd.DataFrame):
                    # we have only one output, can be numpy array
                    trans = pd.DataFrame(trans, index=features.index)
                else:
                    trans = pd.DataFrame(trans)
                trans.dropna(axis='columns', how='all', inplace=True)
                data_cache[out_name] = (trans, targets)  # save it

    return models
Ejemplo n.º 5
0
def train_dag(dag, train_data):
    models = dict()
    data_cache = dict()

    data_cache[dag['input'][2]] = train_data
    models['input'] = True

    unfinished_models = lambda: [m for m in dag if m not in models]
    data_available = lambda: [m for m in dag if data_ready(dag[m][0], data_cache)]
    next_methods = lambda: [m for m in unfinished_models() if m in data_available()]

    while next_methods():

        for m in next_methods():
            # print("Processing:", m)

            # obtain the data
            features, targets = get_data(dag[m][0], data_cache)
            ModelClass, model_params = utils.get_model_by_name(dag[m][1])
            out_name = dag[m][2]
            if isinstance(out_name, list):
                model = ModelClass(len(out_name), **model_params)
            else:
                if isinstance(ModelClass(), feature_selection.SelectKBest):
                    if 'feat_frac' not in model_params:
                        model_params['feat_frac'] = 1.0
                    model_params = model_params.copy()
                    model_params['k'] = max(1, int(model_params['feat_frac']*(features.shape[1]-1)))
                    del model_params['feat_frac']
                if isinstance(ModelClass(), decomposition.PCA):
                    if 'feat_frac' not in model_params:
                        model_params['feat_frac'] = 1.0
                    model_params = model_params.copy()
                    model_params['n_components'] = max(1, int(model_params['feat_frac']*(features.shape[1]-1)))
                    del model_params['feat_frac']
                model = ModelClass(**model_params)

            # build the model
            # some models cannot handle cases with only one class, we also need to check we are not working with a list
            # of inputs for an aggregator
            if isinstance(model, custom_models.Predictor) and isinstance(targets, pd.Series) and len(targets.unique()) == 1:
                model = custom_models.ConstantModel(targets.iloc[0])
            models[m] = model.fit(features, targets)

            # use the model to process the data
            if isinstance(model, custom_models.Aggregator):
                data_cache[out_name] = model.aggregate(features, targets)
                continue
            if isinstance(model, custom_models.Transformer):
                trans = model.transform(features)
            else:              # this is a classifier not a preprocessor
                trans = features                # the data do not change
                targets = pd.Series(list(model.predict(features)), index=features.index)

            # save the outputs
            if isinstance(trans, list):         # the previous model divided the data into several data-sets
                trans = [(x, targets.loc[x.index]) for x in trans]     # need to divide the targets
                for i in range(len(trans)):
                    data_cache[out_name[i]] = trans[i]          # save all the data to the cache
            else:
                trans = pd.DataFrame(trans, index=features.index)       # we have only one output, can be numpy array
                data_cache[out_name] = (trans, targets)                 # save it

    return models
Ejemplo n.º 6
0
parser = ArgumentParser()
parser.add_argument("--weights", type=str, default="../runs/exp12/")
parser.add_argument("--tta", type=str, default='no')
opt = parser.parse_args()
cfg = Config()

desc_test = os.path.join(cfg.ds_folder, 'test.csv')
_, transform_test = get_transforms(cfg.img_size)
valid_data = TestDataset(desc_test,
                         data_folder=os.path.join(cfg.ds_folder, "test"),
                         transform=transform_test)
test_loader = DataLoader(dataset=valid_data, batch_size=cfg.bs, shuffle=False)

models = []
for path in get_kfold_model(opt.weights):
    model = get_model_by_name(cfg.model_name)
    model.load_state_dict(torch.load(path)['state_dict'])
    if opt.tta == 'yes':
        model = tta.ClassificationTTAWrapper(model,
                                             get_tta_transforms(),
                                             merge_mode='mean')
    models.append(model)

rst = [[] for i in range(len(models))]
for index in range(len(models)):
    net = models[index]
    net.to("cuda")
    net.eval()

    files = []
    with torch.no_grad():
Ejemplo n.º 7
0
def train_dag(dag, train_data, sample_weight=None):
    models = dict()
    data_cache = dict()

    if isinstance(train_data[0], np.ndarray) and isinstance(train_data[1], np.ndarray): # happens inside booster
        train_data = (pd.DataFrame(train_data[0]), pd.Series(train_data[1]))

    data_cache[dag['input'][2]] = train_data
    models['input'] = True

    unfinished_models = lambda: [m for m in dag if m not in models]
    data_available = lambda: [m for m in dag if data_ready(dag[m][0], data_cache)]
    next_methods = lambda: [m for m in unfinished_models() if m in data_available()]

    while next_methods():

        for m in next_methods():
            # print("Processing:", m)

            # obtain the data
            features, targets, *rest = get_data(dag[m][0], data_cache)
            if rest:
                sample_weight = rest[0]
            ModelClass, model_params = utils.get_model_by_name(dag[m][1])
            out_name = dag[m][2]
            if dag[m][1][0] == 'stacker':
                sub_dags, initial_dag, input_data = extract_subgraphs(dag, m)
                model_params = dict(sub_dags=sub_dags, initial_dag=initial_dag)
                model = ModelClass(**model_params)
                features, targets = data_cache[input_data]
            elif isinstance(out_name, list):
                model = ModelClass(len(out_name), **model_params)
            else:
                if isinstance(ModelClass(), feature_selection.SelectKBest):
                    if 'feat_frac' not in model_params:
                        model_params['feat_frac'] = 1.0
                    model_params = model_params.copy()
                    model_params['k'] = max(1, int(model_params['feat_frac']*(features.shape[1]-1)))
                    del model_params['feat_frac']
                if isinstance(ModelClass(), decomposition.PCA):
                    if 'feat_frac' not in model_params:
                        model_params['feat_frac'] = 1.0
                    model_params = model_params.copy()
                    model_params['n_components'] = max(1, int(model_params['feat_frac']*(features.shape[1]-1)))
                    del model_params['feat_frac']
                model = ModelClass(**model_params)

            # build the model
            # some models cannot handle cases with only one class, we also need to check we are not working with a list
            # of inputs for an aggregator
            if custom_models.is_predictor(model) and isinstance(targets, pd.Series) and len(targets.unique()) == 1:
                model = custom_models.ConstantModel(targets.iloc[0])
            models[m] = fit_model(model, features, targets, sample_weight=sample_weight)
            model = models[m]  # needed to update model if the result was cached

            # use the model to process the data
            if isinstance(model, custom_models.Stacker):
                data_cache[out_name] = model.train, targets.ix[model.train.index]
                continue
            if isinstance(model, custom_models.Aggregator):
                data_cache[out_name] = model.aggregate(features, targets)
                continue
            if custom_models.is_transformer(model):
                trans = model.transform(features)
            else:              # this is a classifier not a preprocessor
                trans = features                # the data do not change
                if isinstance(features, pd.DataFrame):
                    targets = pd.Series(list(model.predict(features)), index=features.index)
                else: # this should happen only inside booster
                    targets = pd.Series(list(model.predict(features)))

            # save the outputs
            if isinstance(trans, list):         # the previous model divided the data into several data-sets
                if isinstance(model, custom_models.KMeansSplitter) and sample_weight is not None:
                    trans = [(x, targets.loc[x.index], sample_weight[model.weight_idx[i]]) for i, x in enumerate(trans)]  # need to divide the targets and the weights
                else:
                    trans = [(x, targets.loc[x.index]) for x in trans]     # need to divide the targets
                for i in range(len(trans)):
                    data_cache[out_name[i]] = trans[i]          # save all the data to the cache
            else:
                if isinstance(features, pd.DataFrame):
                    trans = pd.DataFrame(trans, index=features.index)       # we have only one output, can be numpy array
                else:
                    trans = pd.DataFrame(trans)
                trans.dropna(axis='columns', how='all', inplace=True)
                data_cache[out_name] = (trans, targets)                 # save it

    return models