Example #1
0
File: mapper.py Project: Alshak/rdm
def domain_map(features, feature_format, train_context, test_context,
               intervals={},
               format='arff',
               positive_class=None):

    dataset = None
    if feature_format in ['rsd', 'aleph']:
        train_rsd = RSDConverter(train_context)
        test_rsd = RSDConverter(test_context, discr_intervals=intervals)
        mapper_target_name = train_context.target_table + '_mapper'
        train_examples = train_rsd.all_examples(pred_name=mapper_target_name)
        test_examples = test_rsd.all_examples(pred_name=mapper_target_name)
        
        if feature_format == 'aleph':
            features = aleph_to_rsd_features(features)

        prolog_bk = '\n'.join([
            _example_ids('testExampleIDs', test_examples),
            '%% test examples',
            test_examples,
            '%% train examples',
            train_examples, 
            '%% train background knowledge',
            train_rsd.background_knowledge(),
            '%% test background knowledge',
            test_rsd.background_knowledge(),
            _feature_numbers(features),
            '%% features',
            features,
        ])
        THIS_DIR = os.path.dirname(__file__) if os.path.dirname(__file__) else '.'
        f = tempfile.NamedTemporaryFile(delete=False)
        f.write(prolog_bk)
        f.close()
        cmd_args = ['yap', '-L', '--', '%s/mapper.pl' % THIS_DIR, f.name, mapper_target_name]
        evaluations = subprocess.check_output(cmd_args)
        dataset = dump_dataset(features, feature_format, evaluations,
                               train_context,
                               format=format,
                               positive_class=positive_class)

        # Cleanup
        os.remove(f.name)

    elif feature_format == 'treeliker':
        # We provide treeliker with the test dataset
        # since it has a built-in ability to evaluate features
        treeliker_test = TreeLikerConverter(test_context, 
                                            discr_intervals=intervals)
        treeliker = features
        treeliker.test_dataset = treeliker_test.dataset()
        _, test_dataset = treeliker.run()

        if format == 'arff':
            dataset = test_dataset
        else:
            return 'unsupported format'
    
    return dataset
Example #2
0
def domain_map(features,
               feature_format,
               train_context,
               test_context,
               intervals={},
               format='arff',
               positive_class=None):
    '''
    Use the features returned by a propositionalization method to map
    unseen test examples into the new feature space.

      :param features: string of features as returned by rsd, aleph or treeliker
      :param feature_format: 'rsd', 'aleph', 'treeliker'
      :param train_context: DBContext with training examples
      :param test_context: DBContext with test examples
      :param intervals: discretization intervals (optional)
      :param format: output format (only arff is used atm)
      :param positive_class: required for aleph

      :return: returns the test examples in propositional form
      :rtype: str

      :Example:

      >>> test_arff = mapper.domain_map(features, 'rsd', train_context, test_context)
    '''
    dataset = None
    if feature_format in ['rsd', 'aleph']:
        train_rsd = RSDConverter(train_context)
        test_rsd = RSDConverter(test_context, discr_intervals=intervals)
        mapper_target_name = train_context.target_table + '_mapper'
        train_examples = train_rsd.all_examples(pred_name=mapper_target_name)
        test_examples = test_rsd.all_examples(pred_name=mapper_target_name)

        if feature_format == 'aleph':
            features = aleph_to_rsd_features(features)

        prolog_bk = '\n'.join([
            _example_ids('testExampleIDs', test_examples),
            '%% test examples',
            test_examples,
            '%% train examples',
            train_examples,
            '%% train background knowledge',
            train_rsd.background_knowledge(),
            '%% test background knowledge',
            test_rsd.background_knowledge(),
            _feature_numbers(features),
            '%% features',
            features,
        ])
        THIS_DIR = os.path.dirname(__file__) if os.path.dirname(
            __file__) else '.'
        f = tempfile.NamedTemporaryFile(delete=False)
        f.write(prolog_bk)
        f.close()
        cmd_args = [
            'yap', '-L', '--',
            '%s/mapper.pl' % THIS_DIR, f.name, mapper_target_name
        ]
        evaluations = subprocess.check_output(cmd_args)
        dataset = dump_dataset(features,
                               feature_format,
                               evaluations,
                               train_context,
                               format=format,
                               positive_class=positive_class)

        # Cleanup
        os.remove(f.name)

    elif feature_format == 'treeliker':
        # We provide treeliker with the test dataset
        # since it has a built-in ability to evaluate features
        treeliker_test = TreeLikerConverter(test_context,
                                            discr_intervals=intervals)
        treeliker = features
        treeliker.test_dataset = treeliker_test.dataset()
        _, test_dataset = treeliker.run()

        if format == 'arff':
            dataset = test_dataset
        else:
            return 'unsupported format'

    return dataset
Example #3
0
def database_rsd_converter(input_dict):
    dump = input_dict['dump'] == 'true'
    rsd = RSDConverter(input_dict['context'], discr_intervals=input_dict['discr_intervals'] or {})
    return {'examples' : rsd.all_examples(), 'bk' : rsd.background_knowledge()}