コード例 #1
0
def train(cfg_path):
    """训练demo"""
    # 获得配置信息
    cfg = get_config(cfg_path)

    # 创建logger
    logger = get_logger(cfg.log_level)
    logger.info("start training:")

    # 创建模型
    model = OneStageDetector(cfg)
    model.to(cfg.device)

    # 创建数据
    dataset = get_dataset(cfg.data.train)
    dataloader = DataLoader(
        dataset,
        batch_size=cfg.batch_size,
        sampler=cfg.sampler,
        num_workers=cfg.num_workers,
        collate_fn=partial(collate, samples_per_gpu=cfg.data.imgs_per_gpu),
        pin_memory=False)

    # 创建训练器并开始训练
    runner = Runner(model, batch_processor, cfg.optimizer, cfg.work_dir,
                    cfg.log_level)
    runner.register_hooks()
    runner.run(dataloader)
コード例 #2
0
ファイル: models_impact.py プロジェクト: thran/experiments2.0
def compare_model_difficulties(data1, data2, model1, model2, plot=True):
    if str(data1) + str(model1) not in cache:
        runner1 = Runner(data1, model1)
        runner1.run(force=True)
        cache[str(data1) + str(model1)] = runner1, model1
    else:
        runner1, model1 = cache[str(data1) + str(model1)]

    if str(data2) + str(model2) not in cache:
        runner2 = Runner(data2, model2)
        runner2.run(force=True)
        cache[str(data2) + str(model2)] = runner2, model2
    else:
        runner2, model2 = cache[str(data2) + str(model2)]

    items = list(set(data1.get_items()) & set(data2.get_items()))
    difficulties = pd.DataFrame(columns=["model1", "model2"], index=items)
    difficulties["model1"] = model1.get_difficulties(items)
    difficulties["model2"] = model2.get_difficulties(items)
    difficulties_corr = difficulties.corr(method="spearman").loc["model1", "model2"]
    if plot:
        plt.plot(difficulties["model1"], difficulties["model2"], "k.")
        plt.title("Difficulties: {}".format(difficulties_corr))
        plt.xlabel(str(model1))
        plt.ylabel(str(model2))

    return difficulties_corr
コード例 #3
0
ファイル: models_impact.py プロジェクト: thran/experiments2.0
def compare_models(data1, data2, model1, model2, plot=True):
    if str(data1) + str(model1) not in cache:
        runner1 = Runner(data1, model1)
        runner1.run(force=True)
        cache[str(data1) + str(model1)] = runner1, model1
    else:
        runner1, model1 = cache[str(data1) + str(model1)]

    if str(data2) + str(model2) not in cache:
        runner2 = Runner(data2, model2)
        runner2.run(force=True)
        cache[str(data2) + str(model2)] = runner2, model2
    else:
        runner2, model2 = cache[str(data2) + str(model2)]

    difficulties_corr, skills_corr, predictions_corr = 0, 0, 0

    # difficulties
    items = list(set(data1.get_items()) & set(data2.get_items()))
    difficulties = pd.DataFrame(columns=["model1", "model2"], index=items)
    difficulties["model1"] = model1.get_difficulties(items)
    difficulties["model2"] = model2.get_difficulties(items)
    difficulties_corr = difficulties.corr(method="spearman").loc["model1", "model2"]
    if plot:
        plt.subplot(221)
        plt.plot(difficulties["model1"], difficulties["model2"], "k.")
        plt.title("Difficulties: {}".format(difficulties_corr))
        plt.xlabel(str(model1))
        plt.ylabel(str(model2))

    try:
        # skills
        students = list(set(data1.get_students()) & set(data2.get_students()))
        skills = pd.DataFrame(index=students, columns=["model1", "model2"])
        skills["model1"] = model1.get_skills(students)
        skills["model2"] = model2.get_skills(students)
        skills_corr = skills.corr(method="spearman").loc["model1", "model2"]
        if plot:
            plt.subplot(222)
            plt.plot(skills["model1"], skills["model2"], "k.")
            plt.title("Skills: {}".format(skills_corr))
            plt.xlabel(str(model1))
            plt.ylabel(str(model2))
    except AttributeError:
        pass

    # predictions
    predictions = pd.DataFrame(index=students, columns=["model1", "model2"])
    predictions["model1"] = pd.Series(runner1._log)
    predictions["model2"] = pd.Series(runner2._log)
    predictions_corr = predictions.corr(method="spearman").loc["model1", "model2"]
    if plot:
        plt.subplot(223)
        plt.plot(predictions["model1"], predictions["model2"], "k.")
        plt.title("Predictions: {}".format(predictions_corr))
        plt.xlabel(str(model1))
        plt.ylabel(str(model2))

    return difficulties_corr, skills_corr, predictions_corr
コード例 #4
0
ファイル: time_utils.py プロジェクト: thran/experiments2.0
def get_difficulties(answers, data=None, model=None, force=False, name="difficulty"):
    if data and model:
        runner = Runner(data, model)
        file_name = "../cache/difficulties_{}.pd".format(runner._hash)
    else:
        data = d.Data("../data/matmat/2016-11-28/answers.pd")
        model = EloPriorCurrentModel(KC=2, KI=0.5)
        runner = Runner(data, model)
        file_name = "../cache/difficulties_matmat.pd"
    if os.path.exists(file_name) and not force:
        difficulties = pd.read_pickle(file_name)
    else:
        items = answers["item"].unique()
        runner.run(force=True)
        difficulties = pd.Series(data=model.get_difficulties(items), index=items, name=name)
        difficulties.to_pickle(file_name)

    return difficulties
コード例 #5
0
    def EvtRun(self, event):
        runner = Runner(self.storage.data)

        dlg = wx.ProgressDialog("Running",
                                "Current Song: ",
                                maximum=len(runner.files) + 1,
                                parent=self,
                                style=0
                                | wx.PD_APP_MODAL
                                | wx.PD_CAN_ABORT
                                | wx.PD_ELAPSED_TIME
                                | wx.PD_ESTIMATED_TIME
                                | wx.PD_REMAINING_TIME)

        runner.run(dlg)
        dlg.Destroy()

        self.resultsWindow = ResultsWindow(self, runner.results,
                                           self.storage.data["targetDir"])
コード例 #6
0
ファイル: models_impact.py プロジェクト: thran/experiments2.0
def difficulty_stability(datas, models, labels, points, comparable=True, runs=1, eval_data=None):
    df = pd.DataFrame(columns=["data size", "correlation", "models"])
    for i in range(points):
        ratio = (i + 1) / points
        print("Evaluation for {}% of data".format(ratio * 100))

        values = defaultdict(lambda: [])
        for data, model, label in zip(datas, models, labels):
            for run in range(runs):
                d = data(None)
                d.set_seed(run)
                d.set_train_size(ratio)
                d.filter_data(100, 0)
                m = model(None)

                Runner(d, m).run(force=True, only_train=True)

                items = d.get_items()
                if eval_data is None:
                    values[label].append(pd.Series(m.get_difficulties(items), items))
                else:
                    r = Runner(eval_data, m)
                    r.run(force=True, skip_pre_process=True)
                    values[label].append(pd.Series(r._log))

        for i, (data1, model1, label1) in enumerate(zip(datas, models, labels)):
            for data2, model2, label2 in list(zip(datas, models, labels))[i:]:
                print("Computing correlations for " + label1 + " -- " + label2)
                if comparable and label1 != label2:
                    for v1, v2 in zip(values[label1], values[label2]):
                        df.loc[len(df)] = (ratio, v1.corr(v2), label1 + " -- " + label2)
                else:
                    for v1 in values[label1]:
                        for v2 in values[label2]:
                            if v1.sum() == v2.sum() and ratio != 1:
                                continue
                            df.loc[len(df)] = (ratio, v1.corr(v2), label1 + " -- " + label2)

    print(df)
    sns.factorplot(x="data size", y="correlation", hue="models", data=df)
コード例 #7
0
ファイル: models_impact.py プロジェクト: thran/experiments2.0
def difficulty_stability2(datas, models, labels, points, runs=1, eval_data=None):
    filename = "../../data/matmat/2016-01-04/tmp2.data.pd"
    df = pd.DataFrame(columns=["answers", "correlation", "models"])
    student_count = len(datas[0](None).get_dataframe_all())
    for i in range(points):
        ratio = (i + 1) / points
        print("Evaluation for {}% of data".format(ratio * 100))

        for data, model, label in zip(datas, models, labels):
            for run in range(runs):
                d = data(None)
                d.set_seed(run)
                d.set_train_size(ratio)
                d.filter_data(100, 0)
                d.get_dataframe_train().to_pickle(filename)
                m1 = model(None)
                m2 = model(None)
                d1 = Data(filename, train_size=0.5, train_seed=run + 42)
                d2 = Data(filename, train_size=0.5, train_seed=-run - 42)

                Runner(d1, m1).run(force=True, only_train=True)
                Runner(d2, m2).run(force=True, only_train=True)

                items = d.get_items()
                if eval_data is None:
                    v1 = pd.Series(m1.get_difficulties(items), items)
                    v2 = pd.Series(m2.get_difficulties(items), items)
                else:
                    r1 = Runner(eval_data(None), m1)
                    r2 = Runner(eval_data(None), m2)
                    r1.run(force=True, skip_pre_process=True)
                    r2.run(force=True, skip_pre_process=True)
                    v1 = pd.Series(r1._log)
                    v2 = pd.Series(r2._log)
                df.loc[len(df)] = (ratio * student_count, v1.corr(v2), label)

    print(df)
    sns.factorplot(x="answers", y="correlation", hue="models", data=df, markers=["o", "^", "v", "s", "D"])
    return df
コード例 #8
0
ファイル: main.py プロジェクト: Mellinare/l33t
def main():
    from network.listener import ConnectionsListener
    from network.session_controller import SessionsController
    from ui.text import UiController

    UI = UiController

    args = params()

    if 'ui' in args:
        if args['ui'] == 'text':
            from ui.text import UiController
            UI = UiController

        elif args['ui'] == 'gui':
            from ui.gui import UiController
            UI = UiController

        else:
            print('Wrong visual mode: {} (must be "gui" or "text")'.
                  format(args['ui']))

    ui_controller = Runner(UI())
    ui_controller.run()
    print('UI thread started!')

    sessions = Runner(SessionsController(ui_controller.task))
    sessions.run()
    print('Session controller thread started!')

    # Connections listener thread start
    listener = Runner(ConnectionsListener(sessions.task))
    listener.run()
    print('Connection listener thread started!')

    print('End of init thread')
コード例 #9
0
ファイル: evaluator.py プロジェクト: thran/experiments2.0
class Evaluator:
    def __init__(self, data, model):
        self._model = model
        self._data = data

        self._runner = Runner(data, model)
        self._hash = None

    def clean(self):
        self._runner.clean()

    def evaluate(self, force_evaluate=False, answer_filters=None, **kwargs):
        answer_filters = answer_filters if answer_filters is not None else {}
        report = self._load_report()
        self._data.join_predictions(pd.read_pickle(self._runner.get_log_filename()))
        if force_evaluate or "evaluated" not in report:
            print("Evaluating", self._hash, self._data, self._model)
            report.update(self._basic_metrics(self._data.iter_test(), **kwargs))

            report['time'] = self._basic_metrics(
                self._data.iter_test(),
                prediction_column="time_prediction_log",
                observation_column="response_time_log",
                brier_min=self._data.get_dataframe_test()['time_prediction_log'].min(),
                brier_max=self._data.get_dataframe_test()['time_prediction_log'].max(),
                **kwargs)

            report['time-raw'] = self._basic_metrics(
                self._data.iter_test(),
                prediction_column="time_prediction",
                observation_column="response_time",
                brier_min=self._data.get_dataframe_test()['time_prediction'].min(),
                brier_max=self._data.get_dataframe_test()['time_prediction'].max(),
                **kwargs)

        if answer_filters is not None:
            for filter_name, filter_function in answer_filters.items():
                if force_evaluate or filter_name not in report:
                    print("Evaluating", filter_name, self._hash, self._data, self._model)
                    data = filter_function(self._data.get_dataframe_test())
                    report[filter_name] = self._basic_metrics(self._data.iter(data=data), **kwargs)

                    report[filter_name]['time'] = self._basic_metrics(
                        self._data.iter(data=data),
                        prediction_column="time_prediction_log",
                        observation_column="response_time_log",
                        brier_min=self._data.get_dataframe_test()['time_prediction_log'].min(),
                        brier_max=self._data.get_dataframe_test()['time_prediction_log'].max(),
                        **kwargs)

                    report[filter_name]['time-raw'] = self._basic_metrics(
                        self._data.iter(data=data),
                        prediction_column="time_prediction",
                        observation_column="response_time",
                        brier_min=self._data.get_dataframe_test()['time_prediction'].min(),
                        brier_max=self._data.get_dataframe_test()['time_prediction'].max(),
                        **kwargs)

        self._save_report(report)
        return report

    def _basic_metrics(self, data, brier_bins=20, prediction_column="prediction", observation_column="correct", brier_min=0, brier_max=1):
        report = {}

        n = 0           # log count
        sse = 0         # sum of square error
        llsum = 0       # log-likely-hood sum
        brier_counts = np.zeros(brier_bins)          # count of answers in bins
        brier_correct = np.zeros(brier_bins)        # sum of correct answers in bins
        brier_prediction = np.zeros(brier_bins)     # sum of predictions in bins

        for log in data:
            n += 1
            sse += (log[prediction_column] - log[observation_column]) ** 2
            llsum += math.log(max(0.0001, log[prediction_column] if log[observation_column] else (1 - log[prediction_column])))

            # brier
            bin = min(int((log[prediction_column] - brier_min) / (brier_max - brier_min) * brier_bins), brier_bins - 1)
            brier_counts[bin] += 1
            brier_correct[bin] += log[observation_column]
            brier_prediction[bin] += log[prediction_column]

        answer_mean = sum(brier_correct) / n

        report["extra"] = {"answer_mean": answer_mean}
        report["rmse"] = math.sqrt(sse / n)
        report["log-likely-hood"] = llsum
        if observation_column == "correct":
            try:
                report["AUC"] = metrics.roc_auc_score(self._data.get_dataframe_test()[observation_column],
                                                      self._data.get_dataframe_test()[prediction_column])
            except ValueError:
                print("AUC - converting responses to 0, 1")
                report["AUC"] = metrics.roc_auc_score(self._data.get_dataframe_test()[observation_column] > 0,
                                                      self._data.get_dataframe_test()[prediction_column])

        # brier
        brier_prediction_means = brier_prediction / brier_counts
        brier_prediction_means[np.isnan(brier_prediction_means)] = \
            ((np.arange(brier_bins) + 0.5) / brier_bins)[np.isnan(brier_prediction_means)]
        brier_correct_means = brier_correct / brier_counts
        brier_correct_means[np.isnan(brier_correct_means)] = 0
        brier = {
            "reliability":  sum(brier_counts * (brier_correct_means - brier_prediction_means) ** 2) / n,
            "resolution":  sum(brier_counts * (brier_correct_means - answer_mean) ** 2) / n,
            "uncertainty": answer_mean * (1 - answer_mean),

        }
        report["brier"] = brier

        report["extra"]["brier"] = {
            "max": brier_max,
            "min": brier_min,
            "bin_count": brier_bins,
            "bin_counts": list(brier_counts),
            "bin_prediction_means": list(brier_prediction_means),
            "bin_correct_means": list(brier_correct_means),
        }
        report["evaluated"] = True

        return report

    def get_report(self, force_evaluate=False, force_run=False, **kwargs):
        self._hash = self._runner.run(force=force_run)
        return self.evaluate(force_evaluate=force_evaluate or force_run, **kwargs)

    def roc_curve(self):
        self.get_report()
        self._data.join_predictions(pd.read_pickle(self._runner.get_log_filename()))
        fpr, tpr, thresholds = metrics.roc_curve(self._data.get_dataframe_test()["correct"] > 0, self._data.get_dataframe_test()["prediction"])
        print(fpr, tpr, thresholds)
        plt.plot(fpr, tpr, label=str(self._data))

    def _save_report(self, report):
        json.dump(report, open(self._runner.get_report_filename(), "w"), indent=4)

    def _load_report(self):
        return json.load(open(self._runner.get_report_filename()))

    def __str__(self):
        return json.dumps(self.get_report(), sort_keys=True, indent=4)

    def brier_graphs(self, time=False, time_raw=False):
        report = self.get_report()
        if time and not time_raw:
            report = report['time']
        if time_raw:
            report = report['time-raw']

        plt.figure()
        plt.plot(report["extra"]["brier"]["bin_prediction_means"], report["extra"]["brier"]["bin_correct_means"])
        l = report["extra"]["brier"]['min'], report["extra"]["brier"]['max']
        plt.plot(l, l)

        bin_count = report["extra"]["brier"]["bin_count"]
        counts = np.array(report["extra"]["brier"]["bin_counts"])
        bins = (np.arange(bin_count) + 0.5) * (l[1] - l[0]) / bin_count + l[0]
        plt.bar(bins, counts / max(counts) * l[1], width=(0.5 / bin_count * (l[1] - l[0])), alpha=0.5)
        plt.title(self._model)
        plt.xlabel('prediction')
        plt.ylabel('observation mean')