Beispiel #1
0
def domain_map(features, feature_format, train_context, test_context,
               intervals={},
               format='arff',
               positive_class=None):

    dataset = None
    if feature_format in ['rsd', 'aleph']:
        train_rsd = RSD_Converter(train_context)
        test_rsd = RSD_Converter(test_context, discr_intervals=intervals)
        mapper_target_name = train_context.target_table + '_mapper'
        train_examples = train_rsd.all_examples(pred_name=mapper_target_name)
        test_examples = test_rsd.all_examples(pred_name=mapper_target_name)
        
        if feature_format == 'aleph':
            features = aleph_to_rsd_features(features)

        prolog_bk = '\n'.join([
            _example_ids('testExampleIDs', test_examples),
            '%% test examples',
            test_examples,
            '%% train examples',
            train_examples, 
            '%% train background knowledge',
            train_rsd.background_knowledge(),
            '%% test background knowledge',
            test_rsd.background_knowledge(),
            _feature_numbers(features),
            '%% features',
            features,
        ])
        THIS_DIR = os.path.dirname(__file__) if os.path.dirname(__file__) else '.'
        f = tempfile.NamedTemporaryFile(delete=False)
        f.write(prolog_bk)
        f.close()
        cmd_args = ['yap', '-L', '--', '%s/mapper.pl' % THIS_DIR, f.name, mapper_target_name]
        evaluations = subprocess.check_output(cmd_args)
        dataset = dump_dataset(features, feature_format, evaluations,
                               train_context,
                               format=format,
                               positive_class=positive_class)

        # Cleanup
        os.remove(f.name)

    elif feature_format == 'treeliker':
        # We provide treeliker with the test dataset
        # since it has a built-in ability to evaluate features
        treeliker_test = TreeLikerConverter(test_context, 
                                            discr_intervals=intervals)
        treeliker = features
        treeliker.test_dataset = treeliker_test.dataset()
        _, test_dataset = treeliker.run()

        if format == 'arff':
            dataset = test_dataset
        else:
            return 'unsupported format'
    
    return dataset
Beispiel #2
0
def mysql_rsd_converter(input_dict):
    dump = input_dict['dump'] == 'true'
    rsd = RSD_Converter(input_dict['context'],
                        discr_intervals=input_dict['discr_intervals'] or {},
                        dump=dump)
    return {'examples': rsd.all_examples(), 'bk': rsd.background_knowledge()}
Beispiel #3
0
def mysql_rsd_converter(input_dict):
    dump = input_dict['dump'] == 'true'
    rsd = RSD_Converter(input_dict['context'], discr_intervals=input_dict['discr_intervals'] or {}, dump=dump)
    return {'examples' : rsd.all_examples(), 'bk' : rsd.background_knowledge()}
Beispiel #4
0
def domain_map(features,
               feature_format,
               train_context,
               test_context,
               intervals={},
               format='arff',
               positive_class=None):

    dataset = None
    if feature_format in ['rsd', 'aleph']:
        train_rsd = RSD_Converter(train_context)
        test_rsd = RSD_Converter(test_context, discr_intervals=intervals)
        mapper_target_name = train_context.target_table + '_mapper'
        train_examples = train_rsd.all_examples(pred_name=mapper_target_name)
        test_examples = test_rsd.all_examples(pred_name=mapper_target_name)

        if feature_format == 'aleph':
            features = aleph_to_rsd_features(features)

        prolog_bk = '\n'.join([
            _example_ids('testExampleIDs', test_examples),
            '%% test examples',
            test_examples,
            '%% train examples',
            train_examples,
            '%% train background knowledge',
            train_rsd.background_knowledge(),
            '%% test background knowledge',
            test_rsd.background_knowledge(),
            _feature_numbers(features),
            '%% features',
            features,
        ])
        THIS_DIR = os.path.dirname(__file__) if os.path.dirname(
            __file__) else '.'
        f = tempfile.NamedTemporaryFile(delete=False)
        f.write(prolog_bk)
        f.close()
        cmd_args = [
            'yap', '-L', '--',
            '%s/mapper.pl' % THIS_DIR, f.name, mapper_target_name
        ]
        evaluations = subprocess.check_output(cmd_args)
        dataset = dump_dataset(features,
                               feature_format,
                               evaluations,
                               train_context,
                               format=format,
                               positive_class=positive_class)

        # Cleanup
        os.remove(f.name)

    elif feature_format == 'treeliker':
        # We provide treeliker with the test dataset
        # since it has a built-in ability to evaluate features
        treeliker_test = TreeLikerConverter(test_context,
                                            discr_intervals=intervals)
        treeliker = features
        treeliker.test_dataset = treeliker_test.dataset()
        _, test_dataset = treeliker.run()

        if format == 'arff':
            dataset = test_dataset
        else:
            return 'unsupported format'

    return dataset