Esempio n. 1
0
    def ListPrimitives(self, request, context):
        '''
    List all primitives known to TA2, their IDs, versions, names, and digests. Using this
    information a TA3 should know which primitives may be put into a pipeline template.
    To narrow down potential primitives to use a TA3 can also ask a TA2 to do a solution
    search and then observe which primitives the TA2 is using. If more metadata about primitives
    is needed, then a TA3 can use the results of this call to map primitives to metadata
    (from Python code or primitive annotations) on its own.
    '''

        list_primitives = []
        source_primitives = []
        primitives = index.search()

        for prim in primitives:
            try:
                p = index.get_primitive(prim)
                source_primitives.append(p)
            except:
                0

        for p in source_primitives:
            meta = p.metadata.to_json_structure()
            list_primitives.append(
                primitive_pb2.Primitive(id=meta['id'],
                                        version=meta['version'],
                                        python_path=meta['python_path'],
                                        name=meta['name'],
                                        digest=meta['digest']))
        return core_pb2.ListPrimitivesResponse(primitives=list_primitives)
Esempio n. 2
0
    def test_entrypoint(self):
        working_set_entries = copy.copy(pkg_resources.working_set.entries)
        working_set_entry_keys = copy.copy(
            pkg_resources.working_set.entry_keys)
        working_set_by_key = copy.copy(pkg_resources.working_set.by_key)

        try:
            distribution = pkg_resources.Distribution(__file__)
            entry_point = pkg_resources.EntryPoint.parse(
                'foo2.bar2.FooBar2Primitive = test_index:FooBar2Primitive',
                dist=distribution)
            distribution._ep_map = {
                'd3m.primitives': {
                    'foo2.bar2.FooBar2Primitive': entry_point
                }
            }
            pkg_resources.working_set.add(distribution)

            python_path = 'd3m.primitives.foo2.bar2.FooBar2Primitive'

            self.assertIn(python_path, index.search())

            self.assertIs(index.get_primitive(python_path), FooBar2Primitive)

        finally:
            pkg_resources.working_set.entries = working_set_entries
            pkg_resources.working_set.entry_keys = working_set_entry_keys
            pkg_resources.working_set.by_key = working_set_by_key
Esempio n. 3
0
def available_primitives():
    primitives_info = []

    with d3m_utils.silence():
        for primitive_path in d3m_index.search():
            if primitive_path in PrimitivesList.BlockList:
                continue

            try:
                primitive = d3m_index.get_primitive(primitive_path)
                primitive_id = primitive.metadata.query()['id']
                version = primitive.metadata.query()['version']
                python_path = primitive.metadata.query()['python_path']
                name = primitive.metadata.query()['name']
                digest = primitive.metadata.query().get('digest', None)
                primitive_info = {
                    'id': primitive_id,
                    'version': version,
                    'python_path': python_path,
                    'name': name,
                    'digest': digest
                }
                primitives_info.append(primitive_info)
            except:
                continue
    return primitives_info
Esempio n. 4
0
    def load_from_d3m_index(self):
        '''Load primitive description from installed python packages'''

        for primitive_path, primitive_type in index.search().items():
            primitive = self._create_primitive_desc(primitive_type.metadata)
            if primitive.cls in self.black_list_package:
                print('Black listing primitive: {}'.format(primitive.name))
            else:
                self.primitives.append(primitive)

        self._setup()
Esempio n. 5
0
def main():
    ray.init(webui_host='127.0.0.1')
    # Creating parser
    parser = argparse.ArgumentParser(
        description="Starts server from command line")
    configure_parser(parser)
    arguments = parser.parse_args()

    # Setup logger
    verbose_format = '%(asctime)s %(levelname)-8s %(processName)-15s [%(filename)s:%(lineno)d] %(message)s'
    concise_format = '%(asctime)s %(levelname)-8s %(message)s'
    log_format = verbose_format if arguments.verbose else concise_format
    logging.basicConfig(format=log_format,
                        handlers=[
                            logging.StreamHandler(),
                            logging.FileHandler(
                                '{}/d3m.log'.format(Path.TEMP_STORAGE_ROOT),
                                'w', 'utf-8')
                        ],
                        datefmt='%m/%d %H:%M:%S')
    root_logger = logging.getLogger()
    root_logger.setLevel(logging.INFO)
    warnings.filterwarnings('ignore')

    server = Server(arguments)

    try:
        load_time = time.time()
        server.start()
        with d3m_utils.silence():
            d3m_index.load_all(blocklist=PrimitivesList.BlockList)
        print('Wait for loading workers for', len(d3m_index.search()) * 0.3)
        time.sleep(len(d3m_index.search()) * 0.3)
        # time.sleep(5)
        logger.info('---------- Waiting for Requests ----------')
        while True:
            time.sleep(_ONE_DAY_IN_SECONDS)
    except KeyboardInterrupt:
        logger.info('############ STOPPING SERVICE ############')
        server.stop()
Esempio n. 6
0
    def __init__(self):
        self.primitive = d3m_index.search()
        self.argmentsmapper = {
            "container": metadata_base.ArgumentType.CONTAINER,
            "data": metadata_base.ArgumentType.DATA,
            "value": metadata_base.ArgumentType.VALUE,
            "primitive": metadata_base.ArgumentType.PRIMITIVE
        }
        self.stepcheck = None  # Generate a step check matrix

        self.step_number = {}
        self.addstep_mapper = {
            ("<class 'd3m.container.pandas.DataFrame'>", "<class 'd3m.container.numpy.ndarray'>"):
            "d3m.primitives.data.DataFrameToNDArray",
            # ("<class 'd3m.container.pandas.DataFrame'>", "<class 'd3m.container.numpy.ndarray'>"): "d3m.primitives.data_cleaning.imputer.SKlearn",
            ("<class 'd3m.container.numpy.ndarray'>", "<class 'd3m.container.pandas.DataFrame'>"):
            "d3m.primitives.data.NDArrayToDataFrame"
        }
        self.description_info = ""
        self.need_add_reference = False
Esempio n. 7
0
import os
import logging
import json
from d3m import index

logger = logging.getLogger(__name__)

PRIMITIVES_BY_NAME_PATH = os.path.join(os.path.dirname(__file__),
                                       '../resource/primitives_by_name.json')
PRIMITIVES_BY_TYPE_PATH = os.path.join(os.path.dirname(__file__),
                                       '../resource/primitives_by_type.json')

INSTALLED_PRIMITIVES = sorted(index.search(),
                              key=lambda x: x.endswith('SKlearn'),
                              reverse=True)

BLACK_LIST = {
    'd3m.primitives.classification.random_classifier.Test',
    'd3m.primitives.classification.global_causal_discovery.ClassifierRPI',
    'd3m.primitives.classification.tree_augmented_naive_bayes.BayesianInfRPI',
    'd3m.primitives.classification.simple_cnaps.UBC',
    'd3m.primitives.classification.logistic_regression.UBC',
    'd3m.primitives.classification.multilayer_perceptron.UBC',
    'd3m.primitives.classification.canonical_correlation_forests.UBC',
    'd3m.primitives.regression.multilayer_perceptron.UBC',
    'd3m.primitives.regression.canonical_correlation_forests.UBC',
    'd3m.primitives.regression.linear_regression.UBC',
    'd3m.primitives.classification.inceptionV3_image_feature.Gator',
    'd3m.primitives.classification.search.Find_projections',
    'd3m.primitives.classification.search_hybrid.Find_projections',
    'd3m.primitives.regression.search_hybrid_numeric.Find_projections',
Esempio n. 8
0
import template_new
from d3m import utils, index
import json
primitive = index.search()
# print(primitive['d3m.primitives.datasets.Denormalize'].metadata.pretty_print())
mydsbox = template_new.DSBoxTemplate()
with open("user_defined_template_sample.json", "r") as g:
    template = json.load(g)
with open("user_defined_conf_sample.json", "r") as f:
    configuration_point = json.load(f)
# mypipeline = mydsbox.to_pipeline(configuration_point)
# print(configuration_point)
# print(type(configuration_point))
# print(typeof(configuration_point))
# print(mypipeline)
# template_new.printpipeline(mypipeline)
#
mytemplate = template_new.MyTemplate(template)
Esempio n. 9
0
            requirements.append("NO_CATEGORICAL_VALUES")
        if ("unique" in data.name) and (not passed) and ("NOT_UNIQUE"
                                                         not in requirements):
            #print("Primitive cannot handle having a column of unique values")
            requirements.append("NOT_UNIQUE")
        if ("negative"
                in data.name) and (not passed) and ("POSITIVE_VALUES"
                                                    not in requirements):
            #print("Primitive cannot handle negative values")
            requirements.append("POSITIVE_VALUES")
        #if(array):
        #    #prim.isArray = True
        #    prim["IsArray"] = True
    prim["Requirements"] = requirements

    return prim


#Main script
DATADIR = "data_profiler/"  #Dir with the profiling datasets
d = {}
for primitive_name, primitive in index.search().items():
    #print ("Detecting requirements for : " +primitive_name)
    #if(primitive_name == "d3m.primitives.common_primitives.PCA"):
    #if(primitive_name == "d3m.primitives.test.SumPrimitive"):

    #print ("   " + json.dumps(getPrimitiveRequirements(DATADIR,primitive_name, primitive)))
    d[primitive_name] = getPrimitiveRequirements(DATADIR, primitive_name,
                                                 primitive)
print(json.dumps(d))
Esempio n. 10
0
import pickle
from os.path import join
from d3m import index
from copy import deepcopy
from sqlalchemy.orm import joinedload
from d3m.container import Dataset
from d3m_ta2_nyu.pipeline_score import evaluate, kfold_tabular_split, score
from d3m_ta2_nyu.workflow import database
from d3m_ta2_nyu.parameter_tuning.primitive_config import is_tunable
from d3m_ta2_nyu.parameter_tuning.bayesian import HyperparameterTuning, get_new_hyperparameters
from d3m.metadata.problem import PerformanceMetric, TaskKeyword
from d3m_ta2_nyu.ta2 import create_outputfolders

logger = logging.getLogger(__name__)

PRIMITIVES = index.search()


@database.with_db
def tune(pipeline_id, metrics, problem, dataset_uri, sample_dataset_uri, report_rank, timeout_tuning, timeout_run,
         msg_queue, db):
    timeout_tuning = timeout_tuning * 0.9  # FIXME: Save 10% of timeout to score the best config
    # Load pipeline from database
    pipeline = (
        db.query(database.Pipeline)
        .filter(database.Pipeline.id == pipeline_id)
        .options(joinedload(database.Pipeline.modules),
                 joinedload(database.Pipeline.connections))
    ).one()

    logger.info('About to tune pipeline, id=%s, dataset=%r, timeout=%d secs', pipeline_id, dataset_uri, timeout_tuning)