コード例 #1
0
    def setUpClass(cls):
        cls.settings_fh_1 = os.path.join('batch_test', 'batch_test1.json')
        cls.settings_fh_2 = os.path.join('batch_test', 'batch_test2.json')
        cls.settings_1 = utils.get_settings(cls.settings_fh_1)
        cls.settings_2 = utils.get_settings(cls.settings_fh_2)

        cls.NULL = open(os.devnull, 'w')
        cls.proc = subprocess.call(['./parallel_batch_run.py',
                                      '-s', 'batch_test'],
                                      stdout=cls.NULL,
                                      stderr=cls.NULL)
コード例 #2
0
ファイル: test_testing.py プロジェクト: Keesiu/meta-kaggle
    def setUpClass(cls):
        cls.settings_fh_1 = os.path.join('batch_test', 'batch_test1.json')
        cls.settings_fh_2 = os.path.join('batch_test', 'batch_test2.json')
        cls.settings_1 = utils.get_settings(cls.settings_fh_1)
        cls.settings_2 = utils.get_settings(cls.settings_fh_2)

        cls.NULL = open(os.devnull, 'w')
        cls.proc = subprocess.call(
            ['./parallel_batch_run.py', '-s', 'batch_test'],
            stdout=cls.NULL,
            stderr=cls.NULL)
コード例 #3
0
    def setUpClass(cls):
        cls.settings_fh = 'test_settings.json'

        cls.settings = utils.get_settings(cls.settings_fh)

        cls.all_subjects = set(['Dog_1',
                             'Dog_2',
                             'Dog_3',
                             'Dog_4',
                             'Dog_5',
                             'Patient_1',
                             'Patient_2'])

        cls.all_types = set(['preictal',
                              'test',
                              'pseudointerictal',
                              'interictal',
                              'pseudopreictal'])

        cls.malformed_feat = 'malformed_feat'
        cls.malformed_file = os.path.join(cls.settings['TRAIN_DATA_PATH'],
                                          "{0}{1}.h5".format(
                                                    cls.malformed_feat,
                                                    cls.settings['VERSION']))

        cls.malformed_feat = h5py.File(cls.malformed_file, 'w')
        cls.malformed_feat.create_dataset('malfie', (10,10))
        cls.malformed_feat.close()
コード例 #4
0
ファイル: test_testing.py プロジェクト: Keesiu/meta-kaggle
    def setUpClass(cls):
        cls.settings_fh = 'test_settings.json'

        cls.settings = utils.get_settings(cls.settings_fh)

        cls.all_subjects = set(['Dog_1',
                             'Dog_2',
                             'Dog_3',
                             'Dog_4',
                             'Dog_5',
                             'Patient_1',
                             'Patient_2'])

        cls.all_types = set(['preictal',
                              'test',
                              'pseudointerictal',
                              'interictal',
                              'pseudopreictal'])

        cls.malformed_feat = 'malformed_feat'
        cls.malformed_file = os.path.join(cls.settings['TRAIN_DATA_PATH'],
                                          "{0}{1}.h5".format(
                                                    cls.malformed_feat,
                                                    cls.settings['VERSION']))

        cls.malformed_feat = h5py.File(cls.malformed_file, 'w')
        cls.malformed_feat.create_dataset('malfie', (10,10))
        cls.malformed_feat.close()
コード例 #5
0
    def setUpClass(cls):
        cls.settings_fh = 'test_data_assembler.json'
        cls.settings = utils.get_settings(cls.settings_fh)
        cls.subjects = cls.settings['SUBJECTS']
        cls.features = cls.settings['FEATURES']
        cls.data = utils.get_data(cls.settings)
        with open('../segmentMetadata.json', 'r') as f:
            cls.metadata = json.load(f)

        cls.ictyps = cls.settings['DATA_TYPES']

        cls.segment_counts = {'Dog_1': {'preictal': 24,
                                        'pseudopreictal': 20,
                                        'interictal': 480,
                                        'pseudointerictal': 400,
                                        'test': 502},
                              'Dog_2': {'preictal': 42,
                                        'pseudopreictal': 35,
                                        'interictal': 500,
                                        'pseudointerictal': 416,
                                        'test': 1000},
                              'Dog_3': {'preictal': 72,
                                        'pseudopreictal': 60,
                                        'interictal': 1440,
                                        'pseudointerictal': 1200,
                                        'test': 907},
                              'Dog_4': {'preictal': 97,
                                        'pseudopreictal': 80,
                                        'interictal': 804,
                                        'pseudointerictal': 670,
                                        'test': 990},
                              'Dog_5': {'preictal': 30,
                                        'pseudopreictal': 25,
                                        'interictal': 450,
                                        'pseudointerictal': 375,
                                        'test': 191},
                              'Patient_1': {'preictal': 18,
                                            'pseudopreictal': 15,
                                            'interictal': 50,
                                            'pseudointerictal': 41,
                                            'test': 195},
                              'Patient_2': {'preictal': 18,
                                            'pseudopreictal': 15,
                                            'interictal': 42,
                                            'pseudointerictal': 35,
                                            'test': 150}}
        cls.feature_length = {'Dog_1': 16,
                              'Dog_2': 16,
                              'Dog_3': 16,
                              'Dog_4': 16,
                              'Dog_5': 15,
                              'Patient_1': 15,
                              'Patient_2': 24}

        cls.ictyp_mapping = {'preictal': 1,
                             'interictal': 0,
                             'pseudopreictal': 1,
                             'pseudointerictal': 0}
コード例 #6
0
ファイル: test_testing.py プロジェクト: Keesiu/meta-kaggle
    def setUpClass(cls):
        cls.settings_fh = 'test_data_assembler.json'
        cls.settings = utils.get_settings(cls.settings_fh)
        cls.subjects = cls.settings['SUBJECTS']
        cls.features = cls.settings['FEATURES']
        cls.data = utils.get_data(cls.settings)
        with open('../segmentMetadata.json', 'r') as f:
            cls.metadata = json.load(f)

        cls.ictyps = cls.settings['DATA_TYPES']

        cls.segment_counts = {'Dog_1': {'preictal': 24,
                                        'pseudopreictal': 20,
                                        'interictal': 480,
                                        'pseudointerictal': 400,
                                        'test': 502},
                              'Dog_2': {'preictal': 42,
                                        'pseudopreictal': 35,
                                        'interictal': 500,
                                        'pseudointerictal': 416,
                                        'test': 1000},
                              'Dog_3': {'preictal': 72,
                                        'pseudopreictal': 60,
                                        'interictal': 1440,
                                        'pseudointerictal': 1200,
                                        'test': 907},
                              'Dog_4': {'preictal': 97,
                                        'pseudopreictal': 80,
                                        'interictal': 804,
                                        'pseudointerictal': 670,
                                        'test': 990},
                              'Dog_5': {'preictal': 30,
                                        'pseudopreictal': 25,
                                        'interictal': 450,
                                        'pseudointerictal': 375,
                                        'test': 191},
                              'Patient_1': {'preictal': 18,
                                            'pseudopreictal': 15,
                                            'interictal': 50,
                                            'pseudointerictal': 41,
                                            'test': 195},
                              'Patient_2': {'preictal': 18,
                                            'pseudopreictal': 15,
                                            'interictal': 42,
                                            'pseudointerictal': 35,
                                            'test': 150}}
        cls.feature_length = {'Dog_1': 16,
                              'Dog_2': 16,
                              'Dog_3': 16,
                              'Dog_4': 16,
                              'Dog_5': 15,
                              'Patient_1': 15,
                              'Patient_2': 24}

        cls.ictyp_mapping = {'preictal': 1,
                             'interictal': 0,
                             'pseudopreictal': 1,
                             'pseudointerictal': 0}
コード例 #7
0
ファイル: test_testing.py プロジェクト: Keesiu/meta-kaggle
    def setUpClass(cls):
        cls.settings_fh = 'test_predict.json'
        cls.settings = utils.get_settings(cls.settings_fh)
        cls.NULL = open(os.devnull, 'w')
        cls.proc = subprocess.call(['./predict.py', '-s', 'test_predict.json'],
                                   stdout=cls.NULL,
                                   stderr=cls.NULL)

        cls.output_file = glob.glob(
            os.path.join(cls.settings['SUBMISSION_PATH'], "*.csv"))
コード例 #8
0
    def setUpClass(cls):
        cls.settings_fh = 'test_predict.json'
        cls.settings = utils.get_settings(cls.settings_fh)
        cls.NULL = open(os.devnull, 'w')
        cls.proc = subprocess.call(['./predict.py',
                                      '-s', 'test_predict.json'],
                                      stdout=cls.NULL,
                                      stderr=cls.NULL)

        cls.output_file = glob.glob(os.path.join(cls.settings['SUBMISSION_PATH'],
                                    "*.csv"))
コード例 #9
0
ファイル: test_testing.py プロジェクト: Keesiu/meta-kaggle
    def setUpClass(cls):

        cls.settings_fh = 'test_train.json'
        cls.settings = utils.get_settings(cls.settings_fh)

        f = open('stdout_tmp', 'w')
        cls.proc = subprocess.call(['./train.py', '-s', 'test_train.json'],
                                   stdout=f)
        f.close()

        with open('stdout_tmp', 'r') as f:
            cls.stdout = f.read()

        cls.model_files = glob.glob(os.path.join(cls.settings['MODEL_PATH'],
                                        "{0}_model_for_*_using_{1}_feats.model".format(\
                                                        cls.settings['RUN_NAME'],
                                                        cls.settings['VERSION'])))
コード例 #10
0
    def setUpClass(cls):

        cls.settings_fh = 'test_train.json'
        cls.settings = utils.get_settings(cls.settings_fh)

        f = open('stdout_tmp', 'w')
        cls.proc = subprocess.call(['./train.py',
                                      '-s', 'test_train.json'],
                                      stdout=f)
        f.close()

        with open('stdout_tmp', 'r') as f:
            cls.stdout = f.read()

        cls.model_files = glob.glob(os.path.join(cls.settings['MODEL_PATH'],
                                        "{0}_model_for_*_using_{1}_feats.model".format(\
                                                        cls.settings['RUN_NAME'],
                                                        cls.settings['VERSION'])))
コード例 #11
0
ファイル: predict.py プロジェクト: Keesiu/meta-kaggle
def main(settings_file='SETTINGS.json', verbose=False):

    # load the settings
    settings = utils.get_settings(settings_file)

    subjects = settings['SUBJECTS']
    features = settings['FEATURES']

    # load the data
    data = utils.get_data(settings)

    # load the metadata
    metadata = utils.get_metadata()

    features_that_parsed = list(data.keys())
    settings['FEATURES'] = [
        feature for feature in settings['FEATURES']
        if feature in features_that_parsed
    ]

    # check if features are 1-minute
    if all('10feat' in feature for feature in settings['FEATURES']):
        # set the flage
        minutefeatures = True
    elif not all('10feat' in feature for feature in settings['FEATURES']) and \
            any('10feat' in feature for feature in settings['FEATURES']):
        raise ValueError("Cannot mix 1-minute and 10-minute features.")
    else:
        minutefeatures = False

    # iterate over subjects
    prediction_dict = {}

    for subject in subjects:
        # load the trained model:
        model = utils.read_trained_model(subject, settings, verbose=verbose)

        # initialise the data assembler
        assembler = utils.DataAssembler(settings, data, metadata)
        # build test set
        X = assembler.build_test(subject)

        # make predictions
        predictions = model.predict_proba(X)

        # if using minute features combine the estimates
        # on each segment by averaging
        if minutefeatures:
            segmentdict = {}
            for segment, prediction in zip(assembler.test_segments,
                                           predictions):
                if segment not in segmentdict:
                    segmentdict[segment] = []
                segmentdict[segment].append(prediction)
            # gathered all predictions corresponding to a segment together
            # now average them along their columns
            for segment in assembler.test_segments:
                segmentdict[segment] = np.vstack(segmentdict[segment])
                segmentdict[segment] = np.mean(segmentdict[segment], axis=0)

        for segment, prediction in zip(assembler.test_segments, predictions):
            prediction_dict[segment] = prediction

    utils.output_csv(prediction_dict, settings, verbose=verbose)
コード例 #12
0
ファイル: train.py プロジェクト: Keesiu/meta-kaggle
def main(settingsfname,
         verbose=False,
         store_models=True,
         store_features=False,
         save_training_detailed=False,
         load_pickled=False,
         parallel=0):

    settings = utils.get_settings(settingsfname)

    utils.print_verbose('=== Settings file   ===', flag=verbose)
    utils.print_verbose(settingsfname, flag=verbose)
    utils.print_verbose('=== Settings loaded ===', flag=verbose)
    utils.print_verbose(settings, flag=verbose)
    utils.print_verbose('=======================', flag=verbose)

    subjects = settings['SUBJECTS']

    data = utils.get_data(settings, verbose=verbose)

    metadata = utils.get_metadata()

    features_that_parsed = [
        feature for feature in settings['FEATURES']
        if feature in list(data.keys())
    ]

    settings['FEATURES'] = features_that_parsed

    if not settings['FEATURES']:
        raise EnvironmentError('No features could be loaded')

    utils.print_verbose("=====Feature HDF5s parsed=====", flag=verbose)

    model_pipe = utils.build_model_pipe(settings)

    utils.print_verbose("=== Model Used ===\n"
                        "{0}\n==================".format(model_pipe),
                        flag=verbose)

    # dictionary to store results
    subject_predictions = {}

    # dictionary to store features in
    transformed_features = {}

    # if we're loading pickled features then load them
    if load_pickled:
        if isinstance(load_pickled, str):
            with open(load_pickled, "rb") as fh:
                Xtra = pickle.load(fh)
        else:
            with open(
                    settingsfname.split(".")[0] + "_feature_dump.pickle",
                    "rb") as fh:
                Xtra = pickle.load(fh)
    else:
        Xtra = None

    # dictionary for final scores
    auc_scores = {}

    if not parallel:
        for subject in subjects:
            utils.print_verbose("=====Training {0} Model=====".format(
                str(subject)),
                                flag=verbose)

            if 'RFE' in settings:
                transformed_features, auc = utils.train_RFE(
                    settings,
                    data,
                    metadata,
                    subject,
                    model_pipe,
                    transformed_features,
                    store_models,
                    store_features,
                    load_pickled,
                    settingsfname,
                    verbose,
                    extra_data=Xtra)
                subject_predictions = None
            elif 'CUSTOM' in settings:
                results, auc = utils.train_custom_model(settings,
                                                        data,
                                                        metadata,
                                                        subject,
                                                        model_pipe,
                                                        store_models,
                                                        load_pickled,
                                                        verbose,
                                                        extra_data=Xtra)
                subject_predictions[subject] = results

            else:
                results, auc = utils.train_model(settings,
                                                 data,
                                                 metadata,
                                                 subject,
                                                 model_pipe,
                                                 store_models,
                                                 load_pickled,
                                                 verbose,
                                                 extra_data=Xtra)
                subject_predictions[subject] = results

            auc_scores.update({subject: auc})

    if parallel:
        if 'RFE' in settings:
            raise NotImplementedError('Parallel RFE is not implemented')

        else:
            output = joblib.Parallel(n_jobs=parallel)(
                joblib.delayed(utils.train_model)(settings,
                                                  data,
                                                  metadata,
                                                  subject,
                                                  model_pipe,
                                                  store_models,
                                                  load_pickled,
                                                  verbose,
                                                  extra_data=Xtra,
                                                  parallel=parallel)
                for subject in subjects)

            results = [x[0] for x in output]
            aucs = [x[1] for x in output]

        for result in results:
            subject_predictions.update(result)

        for auc in aucs:
            auc_scores.update(auc)

    if save_training_detailed:
        with open(save_training_detailed, "wb") as fh:
            pickle.dump(subject_predictions[subject], fh)

    combined_auc = utils.combined_auc_score(settings,
                                            auc_scores,
                                            subj_pred=subject_predictions)

    print(
        "predicted AUC score over all subjects: {0:.2f}".format(combined_auc))
    auc_scores.update({'all': combined_auc})
    utils.output_auc_scores(auc_scores, settings)

    return auc_scores
コード例 #13
0
ファイル: train.py プロジェクト: Neuroglycerin/hail-seizure
def main(settingsfname, verbose=False, store_models=True,
         store_features=False, save_training_detailed=False,
         load_pickled=False, parallel=0):

    settings = utils.get_settings(settingsfname)

    utils.print_verbose('=== Settings file   ===', flag=verbose)
    utils.print_verbose(settingsfname, flag=verbose)
    utils.print_verbose('=== Settings loaded ===', flag=verbose)
    utils.print_verbose(settings, flag=verbose)
    utils.print_verbose('=======================', flag=verbose)

    subjects = settings['SUBJECTS']

    data = utils.get_data(settings, verbose=verbose)

    metadata = utils.get_metadata()

    features_that_parsed = [feature for feature in
                            settings['FEATURES'] if feature in list(data.keys())]

    settings['FEATURES'] = features_that_parsed

    if not settings['FEATURES']:
        raise EnvironmentError('No features could be loaded')

    utils.print_verbose("=====Feature HDF5s parsed=====", flag=verbose)

    model_pipe = utils.build_model_pipe(settings)

    utils.print_verbose("=== Model Used ===\n"
                        "{0}\n==================".format(model_pipe),
                        flag=verbose)

    # dictionary to store results
    subject_predictions = {}

    # dictionary to store features in
    transformed_features = {}

    # if we're loading pickled features then load them
    if load_pickled:
        if isinstance(load_pickled, str):
            with open(load_pickled, "rb") as fh:
                Xtra = pickle.load(fh)
        else:
            with open(settingsfname.split(".")[0]
                      + "_feature_dump.pickle", "rb") as fh:
                Xtra = pickle.load(fh)
    else:
        Xtra = None

    # dictionary for final scores
    auc_scores = {}

    if not parallel:
        for subject in subjects:
            utils.print_verbose(
                "=====Training {0} Model=====".format(str(subject)),
                                flag=verbose)

            if 'RFE' in settings:
                transformed_features, auc = utils.train_RFE(settings,
                                                            data,
                                                            metadata,
                                                            subject,
                                                            model_pipe,
                                                            transformed_features,
                                                            store_models,
                                                            store_features,
                                                            load_pickled,
                                                            settingsfname,
                                                            verbose,
                                                            extra_data=Xtra)
                subject_predictions = None
            elif 'CUSTOM' in settings:
                results, auc = utils.train_custom_model(settings,
                                                        data,
                                                        metadata,
                                                        subject,
                                                        model_pipe,
                                                        store_models,
                                                        load_pickled,
                                                        verbose,
                                                        extra_data=Xtra)
                subject_predictions[subject] = results

            else:
                results, auc = utils.train_model(settings,
                                                 data,
                                                 metadata,
                                                 subject,
                                                 model_pipe,
                                                 store_models,
                                                 load_pickled,
                                                 verbose,
                                                 extra_data=Xtra)
                subject_predictions[subject] = results

            auc_scores.update({subject: auc})

    if parallel:
        if 'RFE' in settings:
            raise NotImplementedError('Parallel RFE is not implemented')

        else:
            output = joblib.Parallel(n_jobs=parallel)(
                joblib.delayed(utils.train_model)(settings,
                                                  data,
                                                  metadata,
                                                  subject,
                                                  model_pipe,
                                                  store_models,
                                                  load_pickled,
                                                  verbose,
                                                  extra_data=Xtra,
                                                  parallel=parallel)
                                                      for subject in subjects)

            results = [x[0] for x in output]
            aucs = [x[1] for x in output]

        for result in results:
            subject_predictions.update(result)

        for auc in aucs:
            auc_scores.update(auc)

    if save_training_detailed:
        with open(save_training_detailed, "wb") as fh:
            pickle.dump(subject_predictions[subject], fh)

    combined_auc = utils.combined_auc_score(settings,
                                            auc_scores,
                                            subj_pred=subject_predictions)

    print(
        "predicted AUC score over all subjects: {0:.2f}".format(combined_auc))
    auc_scores.update({'all': combined_auc})
    utils.output_auc_scores(auc_scores, settings)

    return auc_scores