Python check_input_typeの例

プログラミング言語: Python

名前空間/パッケージ名: fresco.utils

メソッド/関数: check_input_type

hotexamples.comのコード掲載数: 3

Python check_input_type - 3件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのfresco.utils.check_input_typeの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: test_utils.py プロジェクト: jairideout/fresco

    def test_check_input_type(self):
        """Test validating the type of an input."""
        # Single acceptable type.
        obs = check_input_type('foo', 42, int)
        self.assertEqual(obs, None)

        # Multiple acceptable types.
        obs = check_input_type('foo', 42, (str, int))
        self.assertEqual(obs, None)

        # Invalid type (single option).
        with self.assertRaises(InputTypeError):
            _ = check_input_type('foo', 42, float)

        # Invalid type (multiple options).
        with self.assertRaises(InputTypeError):
            _ = check_input_type('foo', 42, (str, float))

コード例 #2

ファイルを表示

ファイル: command_line_wrapper.py プロジェクト: jairideout/fresco

def command_line_argument_wrapper(model, n_iterations, group_map_files,
        start_level, mapping_file, prediction_field, include_only, negate,
        n_maintain, n_generate, score_predictions_function, split_abun_coef,
        split_score_coef, merge_abun_coef, merge_score_coef, delete_abun_coef,
        delete_score_coef, split_proportion, merge_proportion,
        delete_proportion, n_cross_folds, n_processes, output_dir, n_trials):

    """
    Sets up and executes scope optimization on a given problem, runs testing, and writes to files.
    
    Builds the data structures and objects that are used by fresco.scope_optimization from
    command line friendly arguments. Then runs scope optimization, performs cross fold testing
    on the results if instructed, and writes the results to the files.
    
    Args:
        model: A string representing a machine learning classification model to be used
            both for testing in within the optimization process.
        n_iterations: The number of iterations to be completed by the optimization process.
        group_map_files: A list of open files containing tab-separated lines mapping from
            groups to objects. For example:
                Group1    Object1    Object2    Object3
                Group2    Object4    Object5
            Map files should be ordered by decreasing level of scope, i.e., most general to least
            general map files.
        start_level: The starting scope level (map file index) for the optimization process.
        mapping_file: An open file with tab separated lines mapping from a sample ID to its
            properties. The first line should be tab separated identifiers for the properties.
            For example:
                SAMPLE_ID    COLOR    TASTE
                APPLE_g    GREEN    AMAZING
                APPLE_r    RED    AWEFUL
        n_maintain: The number of vectors to be kept after every iteration of optimization.
        n_generate: The number of vectors to be generated for each input vector by the
            vector generator.
        score_predictions_function: A function which takes two lists of class predictions of
            equal length and returns a numerical score. For example:
                def score_predictions(real_classes, predicted_classes):
                    return sum([1 if read_classes[i] != predicted_classes[i] else 0
                                for i in range(len(real_classes))])
        split_abun_coef: The abundance deviation coefficient for the splitting heuristic.
        split_score_coef: The prediction score deviation coefficient for the splitting heuristic.
        merge_abun_coef: The abundance deviation coefficient for the merging heuristic.
        merge_score_coef: The prediction score deviation coefficient for the merging heuristic.
        delete_abun_coef: The abundance deviation coefficient for the deletion heuristic.
        delete_score_coef: The prediction score deviation coefficient for the deletion heuristic.
        split_proportion: The proportion of total features to be split each iteration.
        merge_proportion: The proportion of total features to be split each iteration.
        delete_proportion: The proportion of total features to be split each iteration.
        n_cross_folds: The number of cross folds to use in scoring the vectors for selection.
        n_processes: The number of additional processes to spawn, at maximum.
        output_dir: The directory that the output files will be put in.
        n_trials: The number of cross folds to use in scoring the vectors returned by the
            optimization process. If 0, no testing will be performed.
    """
    simple_var_types = [
                 ("n_iterations", types.IntType),
                 ("n_maintain", types.IntType),
                 ("n_generate", types.IntType),
                 ("n_processes", types.IntType),
                 ("n_cross_folds", types.IntType),
                 ("n_trials", types.IntType),
                 ("start_level", types.IntType),
                 ("split_score_coef", (types.IntType, types.FloatType)),
                 ("split_abun_coef", (types.IntType, types.FloatType)),
                 ("merge_score_coef", (types.IntType, types.FloatType)),
                 ("merge_abun_coef", (types.IntType, types.FloatType)),
                 ("delete_score_coef", (types.IntType, types.FloatType)),   
                 ("delete_abun_coef", (types.IntType, types.FloatType)),
                 ("split_proportion", (types.IntType, types.FloatType)),
                 ("merge_proportion", (types.IntType, types.FloatType)),
                 ("delete_proportion", (types.IntType, types.FloatType)),
                 ("n_cross_folds", types.IntType),
                 ("mapping_file", types.FileType),
                 ("negate", types.BooleanType),
                 ("output_dir", types.StringType),
                 ("prediction_field", types.StringType),
                 ("include_only", (types.NoneType, types.ListType,
                                   types.TupleType)),
                 ("group_map_files", types.ListType)
                ]
    for var_name, var_type in simple_var_types:
        check_input_type(var_name, locals()[var_name], var_type)
    if len(inspect.getargspec(score_predictions_function)[0]) < 2:
        raise InputTypeError("scope_predictions_function should take at least two parameters")
    if not all([isinstance(f, types.FileType) for f in group_map_files]):
        raise InputTypeError("group_map_files should be a list of open files")
    if include_only != None:
        if not isinstance(include_only[0], types.StringType):
            raise InputTypeError("include_only[0] should be of type string")
        if not isinstance(include_only[1], types.ListType) or \
                not all([isinstance(value, types.StringType) for value in include_only[1]]):
            raise InputTypeError("include_only[1] should be a list of strings")

    if not exists(output_dir):
        makedirs(output_dir)

    log_fp = join(output_dir, 'info.log')
    logging.basicConfig(filename=log_fp, filemode='w', level=logging.DEBUG,
                        format='%(asctime)s\t%(levelname)s\t%(message)s')
    logging.info('Started feature vector optimization process for \'%s\' '
                 'model' % model)
    start_time = time()

    vector_model = GroupVectorModel(parse_model_string(model))
    group_vector_scorer = CrossValidationGroupVectorScorer(score_predictions_function, vector_model, n_cross_folds)
    
    problem_data = build_problem_data(group_map_files, mapping_file, prediction_field, start_level, include_only, negate, n_processes)
    assert isinstance(problem_data, ProblemData),\
        "build_problem_data did not return a GroupProblemData"
    
    initial_feature_ids = problem_data.get_group_ids(start_level)
    initial_feature_vector = FeatureVector([problem_data.get_feature_record(start_level, feature_id) for feature_id in initial_feature_ids])
    assert isinstance(initial_feature_vector, FeatureVector),\
        "build_problem_data did not return a FeatureVector"
    
    group_actions = [SplitAction(problem_data, split_proportion, split_abun_coef, split_score_coef),
                     MergeAction(problem_data, merge_proportion, merge_abun_coef, merge_score_coef),
                     DeleteAction(problem_data, delete_proportion, delete_abun_coef, delete_score_coef)]
    vector_generator = ActionVectorGenerator(group_actions, n_generate, problem_data)

    scope_optimization = ScopeOptimization(group_vector_scorer, vector_generator, n_processes, n_iterations, n_maintain)

    logging.info('Completed optimization initialization')
    if n_trials > 0:
        test_outcomes = scope_optimization_cross_validation(scope_optimization, initial_feature_vector, problem_data, vector_model, score_predictions_function, n_trials, n_processes)
        
        assert len(test_outcomes) == n_iterations, "test outcomes isn't returning an entry for each iteration"
        assert all([len(mask_outcomes) == n_trials for mask_outcomes in test_outcomes]), \
            "test outcomes isn't returning an outcome for each mask for each iteration"
        assert all([all([isinstance(outcome, ModelOutcome) for outcome in mask_outcomes])
                    for mask_outcomes in test_outcomes]), \
                    "test outcomes are not all of type ModelOutcome"
        
        prediction_testing_output_fp = join(output_dir,
                                            'prediction_testing_output.txt')
        write_to_file(testing_output_lines(test_outcomes),
                      prediction_testing_output_fp)
        assert isfile(prediction_testing_output_fp), \
            "Output file \"%s\"was not created" %prediction_testing_output_fp
            
        feature_vector_output_fp = join(output_dir,
                                        'fold_feature_vectors_output.txt')
        write_to_file(fold_features_output_lines(test_outcomes[-1], problem_data),
                      feature_vector_output_fp)
        assert isfile(feature_vector_output_fp), \
            "Output file \"%s\"was not created" %feature_vector_output_fp
    else:
        outcome = scope_optimization.optimize_vector(initial_feature_vector, problem_data, False)
        assert isinstance(outcome, ModelOutcome), "scope_optimization outcome is not a ModelOutcome"
        
        feature_vector_output_fp = join(output_dir,
                                        'feature_vector_output.txt')
        write_to_file(feature_output_lines(outcome, problem_data), feature_vector_output_fp)
        assert isfile(feature_vector_output_fp), \
            "Output file \"%s\"was not created" %feature_vector_output_fp

    end_time = time()
    elapsed_time = end_time - start_time
    logging.info('Finished feature vector optimization process for \'%s\' '
                 'model' % model)
    logging.info('Total elapsed time (in seconds): %d' % elapsed_time)

コード例 #3

ファイルを表示

ファイル: group_problem_data.py プロジェクト: jairideout/fresco

def build_problem_data(
    group_map_files,
    mapping_file,
    prediction_field,
    start_level,
    include_only,
    negate,
    n_processes,
    parse_object_string=parse_object_string_sample,
):
    simple_var_types = [
        ("n_processes", types.IntType),
        ("start_level", types.IntType),
        ("negate", types.BooleanType),
        ("prediction_field", types.StringType),
        ("include_only", (types.NoneType, types.ListType, types.TupleType)),
        ("group_map_files", types.ListType),
    ]
    for var_name, var_type in simple_var_types:
        check_input_type(var_name, locals()[var_name], var_type)
    if include_only != None:
        if not isinstance(include_only[0], types.StringType):
            raise InputTypeError("include_only[0] should be of type string")
        if not isinstance(include_only[1], types.ListType) or not all(
            [isinstance(value, types.StringType) for value in include_only[1]]
        ):
            raise InputTypeError("include_only[1] should be a list of strings")
    if start_level >= len(group_map_files) or start_level < 0:
        raise InputTypeError(
            "start_level (%s) is not a valid scope index; group_map_files is of length %s"
            % (start_level, len(group_map_files))
        )

    # For each scope, build a map from group to object and vice versa
    group_to_object = []
    object_to_group = []
    for map_file in group_map_files:
        g_to_o, o_to_g = read_split_file(map_file)
        group_to_object.append(g_to_o)
        object_to_group.append(o_to_g)

        assert isinstance(g_to_o, types.DictType), "read_split_file did not return a dict type"
        assert isinstance(o_to_g, types.DictType), "read_split_file did not return a dict type"

    # Find a list of sample names from our group names
    # An alternative is 'samplenames = samplemap.keys()', but that may have records without features
    samplename_set = set()
    for grp in group_to_object[start_level]:
        objs = group_to_object[start_level][grp]
        for obj in objs:
            samplename = parse_object_string(obj)
            samplename_set.add(samplename)
    samplenames = list(samplename_set)

    # get a map of sample name to it's properties
    samplemap = read_mapping_file(mapping_file)

    def include_samplename(samplename):
        if include_only == None:
            return True
        sample_dict = samplemap[samplename]

        try:
            if (sample_dict[include_only[0]] in include_only[1]) ^ negate:
                return True
            return False
        except KeyError:
            raise KeyError("include_only[0] is not a field in mapping_file")

    sample_to_response = {}
    for samplename in samplenames:
        if include_samplename(samplename):
            sample_fields = None
            try:
                sample_fields = samplemap[samplename]
            except KeyError:
                raise KeyError(
                    "A sample name (%s) found in the group files is not a sample in mapping_file." % samplename
                )
            try:
                sample_to_response[samplename] = sample_fields[prediction_field]
            except KeyError:
                raise KeyError("prediction_field is not a field in mapping_file.")

    problem_data = ProblemData(group_to_object, object_to_group, sample_to_response, n_processes, parse_object_string)

    return problem_data