def ListPrimitives(self, request, context): ''' List all primitives known to TA2, their IDs, versions, names, and digests. Using this information a TA3 should know which primitives may be put into a pipeline template. To narrow down potential primitives to use a TA3 can also ask a TA2 to do a solution search and then observe which primitives the TA2 is using. If more metadata about primitives is needed, then a TA3 can use the results of this call to map primitives to metadata (from Python code or primitive annotations) on its own. ''' list_primitives = [] source_primitives = [] primitives = index.search() for prim in primitives: try: p = index.get_primitive(prim) source_primitives.append(p) except: 0 for p in source_primitives: meta = p.metadata.to_json_structure() list_primitives.append( primitive_pb2.Primitive(id=meta['id'], version=meta['version'], python_path=meta['python_path'], name=meta['name'], digest=meta['digest'])) return core_pb2.ListPrimitivesResponse(primitives=list_primitives)
def test_entrypoint(self): working_set_entries = copy.copy(pkg_resources.working_set.entries) working_set_entry_keys = copy.copy( pkg_resources.working_set.entry_keys) working_set_by_key = copy.copy(pkg_resources.working_set.by_key) try: distribution = pkg_resources.Distribution(__file__) entry_point = pkg_resources.EntryPoint.parse( 'foo2.bar2.FooBar2Primitive = test_index:FooBar2Primitive', dist=distribution) distribution._ep_map = { 'd3m.primitives': { 'foo2.bar2.FooBar2Primitive': entry_point } } pkg_resources.working_set.add(distribution) python_path = 'd3m.primitives.foo2.bar2.FooBar2Primitive' self.assertIn(python_path, index.search()) self.assertIs(index.get_primitive(python_path), FooBar2Primitive) finally: pkg_resources.working_set.entries = working_set_entries pkg_resources.working_set.entry_keys = working_set_entry_keys pkg_resources.working_set.by_key = working_set_by_key
def available_primitives(): primitives_info = [] with d3m_utils.silence(): for primitive_path in d3m_index.search(): if primitive_path in PrimitivesList.BlockList: continue try: primitive = d3m_index.get_primitive(primitive_path) primitive_id = primitive.metadata.query()['id'] version = primitive.metadata.query()['version'] python_path = primitive.metadata.query()['python_path'] name = primitive.metadata.query()['name'] digest = primitive.metadata.query().get('digest', None) primitive_info = { 'id': primitive_id, 'version': version, 'python_path': python_path, 'name': name, 'digest': digest } primitives_info.append(primitive_info) except: continue return primitives_info
def load_from_d3m_index(self): '''Load primitive description from installed python packages''' for primitive_path, primitive_type in index.search().items(): primitive = self._create_primitive_desc(primitive_type.metadata) if primitive.cls in self.black_list_package: print('Black listing primitive: {}'.format(primitive.name)) else: self.primitives.append(primitive) self._setup()
def main(): ray.init(webui_host='127.0.0.1') # Creating parser parser = argparse.ArgumentParser( description="Starts server from command line") configure_parser(parser) arguments = parser.parse_args() # Setup logger verbose_format = '%(asctime)s %(levelname)-8s %(processName)-15s [%(filename)s:%(lineno)d] %(message)s' concise_format = '%(asctime)s %(levelname)-8s %(message)s' log_format = verbose_format if arguments.verbose else concise_format logging.basicConfig(format=log_format, handlers=[ logging.StreamHandler(), logging.FileHandler( '{}/d3m.log'.format(Path.TEMP_STORAGE_ROOT), 'w', 'utf-8') ], datefmt='%m/%d %H:%M:%S') root_logger = logging.getLogger() root_logger.setLevel(logging.INFO) warnings.filterwarnings('ignore') server = Server(arguments) try: load_time = time.time() server.start() with d3m_utils.silence(): d3m_index.load_all(blocklist=PrimitivesList.BlockList) print('Wait for loading workers for', len(d3m_index.search()) * 0.3) time.sleep(len(d3m_index.search()) * 0.3) # time.sleep(5) logger.info('---------- Waiting for Requests ----------') while True: time.sleep(_ONE_DAY_IN_SECONDS) except KeyboardInterrupt: logger.info('############ STOPPING SERVICE ############') server.stop()
def __init__(self): self.primitive = d3m_index.search() self.argmentsmapper = { "container": metadata_base.ArgumentType.CONTAINER, "data": metadata_base.ArgumentType.DATA, "value": metadata_base.ArgumentType.VALUE, "primitive": metadata_base.ArgumentType.PRIMITIVE } self.stepcheck = None # Generate a step check matrix self.step_number = {} self.addstep_mapper = { ("<class 'd3m.container.pandas.DataFrame'>", "<class 'd3m.container.numpy.ndarray'>"): "d3m.primitives.data.DataFrameToNDArray", # ("<class 'd3m.container.pandas.DataFrame'>", "<class 'd3m.container.numpy.ndarray'>"): "d3m.primitives.data_cleaning.imputer.SKlearn", ("<class 'd3m.container.numpy.ndarray'>", "<class 'd3m.container.pandas.DataFrame'>"): "d3m.primitives.data.NDArrayToDataFrame" } self.description_info = "" self.need_add_reference = False
import os import logging import json from d3m import index logger = logging.getLogger(__name__) PRIMITIVES_BY_NAME_PATH = os.path.join(os.path.dirname(__file__), '../resource/primitives_by_name.json') PRIMITIVES_BY_TYPE_PATH = os.path.join(os.path.dirname(__file__), '../resource/primitives_by_type.json') INSTALLED_PRIMITIVES = sorted(index.search(), key=lambda x: x.endswith('SKlearn'), reverse=True) BLACK_LIST = { 'd3m.primitives.classification.random_classifier.Test', 'd3m.primitives.classification.global_causal_discovery.ClassifierRPI', 'd3m.primitives.classification.tree_augmented_naive_bayes.BayesianInfRPI', 'd3m.primitives.classification.simple_cnaps.UBC', 'd3m.primitives.classification.logistic_regression.UBC', 'd3m.primitives.classification.multilayer_perceptron.UBC', 'd3m.primitives.classification.canonical_correlation_forests.UBC', 'd3m.primitives.regression.multilayer_perceptron.UBC', 'd3m.primitives.regression.canonical_correlation_forests.UBC', 'd3m.primitives.regression.linear_regression.UBC', 'd3m.primitives.classification.inceptionV3_image_feature.Gator', 'd3m.primitives.classification.search.Find_projections', 'd3m.primitives.classification.search_hybrid.Find_projections', 'd3m.primitives.regression.search_hybrid_numeric.Find_projections',
import template_new from d3m import utils, index import json primitive = index.search() # print(primitive['d3m.primitives.datasets.Denormalize'].metadata.pretty_print()) mydsbox = template_new.DSBoxTemplate() with open("user_defined_template_sample.json", "r") as g: template = json.load(g) with open("user_defined_conf_sample.json", "r") as f: configuration_point = json.load(f) # mypipeline = mydsbox.to_pipeline(configuration_point) # print(configuration_point) # print(type(configuration_point)) # print(typeof(configuration_point)) # print(mypipeline) # template_new.printpipeline(mypipeline) # mytemplate = template_new.MyTemplate(template)
requirements.append("NO_CATEGORICAL_VALUES") if ("unique" in data.name) and (not passed) and ("NOT_UNIQUE" not in requirements): #print("Primitive cannot handle having a column of unique values") requirements.append("NOT_UNIQUE") if ("negative" in data.name) and (not passed) and ("POSITIVE_VALUES" not in requirements): #print("Primitive cannot handle negative values") requirements.append("POSITIVE_VALUES") #if(array): # #prim.isArray = True # prim["IsArray"] = True prim["Requirements"] = requirements return prim #Main script DATADIR = "data_profiler/" #Dir with the profiling datasets d = {} for primitive_name, primitive in index.search().items(): #print ("Detecting requirements for : " +primitive_name) #if(primitive_name == "d3m.primitives.common_primitives.PCA"): #if(primitive_name == "d3m.primitives.test.SumPrimitive"): #print (" " + json.dumps(getPrimitiveRequirements(DATADIR,primitive_name, primitive))) d[primitive_name] = getPrimitiveRequirements(DATADIR, primitive_name, primitive) print(json.dumps(d))
import pickle from os.path import join from d3m import index from copy import deepcopy from sqlalchemy.orm import joinedload from d3m.container import Dataset from d3m_ta2_nyu.pipeline_score import evaluate, kfold_tabular_split, score from d3m_ta2_nyu.workflow import database from d3m_ta2_nyu.parameter_tuning.primitive_config import is_tunable from d3m_ta2_nyu.parameter_tuning.bayesian import HyperparameterTuning, get_new_hyperparameters from d3m.metadata.problem import PerformanceMetric, TaskKeyword from d3m_ta2_nyu.ta2 import create_outputfolders logger = logging.getLogger(__name__) PRIMITIVES = index.search() @database.with_db def tune(pipeline_id, metrics, problem, dataset_uri, sample_dataset_uri, report_rank, timeout_tuning, timeout_run, msg_queue, db): timeout_tuning = timeout_tuning * 0.9 # FIXME: Save 10% of timeout to score the best config # Load pipeline from database pipeline = ( db.query(database.Pipeline) .filter(database.Pipeline.id == pipeline_id) .options(joinedload(database.Pipeline.modules), joinedload(database.Pipeline.connections)) ).one() logger.info('About to tune pipeline, id=%s, dataset=%r, timeout=%d secs', pipeline_id, dataset_uri, timeout_tuning)