def __init__(self, location=None, dirname=None): # TODO: type check - location (str, path-like object) # TODO: type check - dirname (str, path-like object) dirhome = os.path.expanduser('~') self.location = os.path.abspath(assign_if_none(location, dirhome)) self.dirname = assign_if_none(dirname, '.{dirname}'.format(dirname=CONFIG.NAME)) self.dirpath = os.path.join(self.location, self.dirname)
def __init__(self, schema=None): # check_mapping(schema) self.schema = assign_if_none(schema, {}) self.children = [] self.update(self.schema)
def __init__(self, schema = None): # check_mapping(schema) self.schema = assign_if_none(schema, { }) self.children = [ ] self.update(self.schema)
def __init__(self, email, name = None): # TODO: type check and validate - email (str), valid email # TODO: type check - name (str) # TODO: Maybe try saving base parameters as environment variables? self.email = email self.name = assign_if_none(name, Client.NAME) # TODO: Should we cache databases? self.databases = self.info(refresh_cache = True)
def __init__(self, email, name=None): # TODO: type check and validate - email (str), valid email # TODO: type check - name (str) # TODO: Maybe try saving base parameters as environment variables? self.email = email self.name = assign_if_none(name, Client.NAME) # TODO: Should we cache databases? self.databases = self.info(refresh_cache=True)
def __init__(self, status = None, code = 200): self.version = CONFIG.VERSION self.id = get_rand_uuid_str() self.status = assign_if_none(status, Response.Status.SUCCESS) self.code = code self.schema = addict.Dict() self.schema.id = get_rand_uuid_str() self.schema.version = self.version self.schema.status = self.status
def request(self, method, url, parameters = None, *args, **kwargs): parameters = assign_if_none(parameters, dict()) params = self.baseparams params.update(parameters) response = requests.request(method, url, params = params, *args, **kwargs) if response.ok: data = sanitize_response(response, params['retmode']) else: response.raise_for_status() return data
def __init__(self, status = None, code = 200, data = { }): self.version = CONFIG.VERSION self.id = get_rand_uuid_str() self.status = assign_if_none(status, Response.Status.SUCCESS) self.code = code self.schema = addict.Dict() self.schema.id = get_rand_uuid_str() self.schema.version = self.version self.schema.status = self.status self.set_data(data)
def request(self, method, url, parameters=None, *args, **kwargs): parameters = assign_if_none(parameters, dict()) params = self.baseparams params.update(parameters) response = requests.request(method, url, params=params, *args, **kwargs) if response.ok: data = sanitize_response(response, params['retmode']) else: response.raise_for_status() return data
def resource( path=None, level=None, # provide an exhaustive search filter_=['CDATA', 'CEL']): response = Response() startdir = assign_if_none(path, CONFIG.App.STARTDIR) tree = discover_resource(path=startdir, level=level, filter_=filter_) response.set_data(tree) dict_ = response.to_dict() json_ = jsonify(dict_) code = response.code return json_, code
def test_assign_if_none(): assert util.assign_if_none(None, 'foo') == 'foo' assert util.assign_if_none('foo', 'bar') == 'foo'
def write(path, pipeline=None): pipeline = assign_if_none(pipeline, []) with open(path, mode='w') as f: json.dump(pipeline, f, indent=4)
def runner(self, cdat, heap_size = 16384, seed = None, verbose = True): self.set_status(Pipeline.RUNNING) self.logs.append('Initializing Pipeline') para = self.config self.logs.append('Reading Pipeline Configuration') head = '' name = get_rand_uuid_str() self.logs.append('Reading Input File') for i, stage in enumerate(self.stages): if stage.code in ('dat.fle', 'prp.bgc', 'prp.nrm', 'prp.pmc', 'prp.sum'): self.stages[i].status = Pipeline.RUNNING if stage.code == 'dat.fle': head = os.path.abspath(stage.value.path) name, _ = os.path.splitext(stage.value.name) self.logs.append('Parsing to ARFF') path = os.path.join(head, '{name}.arff'.format(name = name)) # This bug, I don't know why, using Config.schema instead. # cdat.toARFF(path, express_config = para.Preprocess.schema, verbose = verbose) for i, stage in enumerate(self.stages): if stage.code in ('dat.fle', 'prp.bgc', 'prp.nrm', 'prp.pmc', 'prp.sum'): self.stages[i].status = Pipeline.COMPLETE self.logs.append('Saved ARFF at {path}'.format(path = path)) self.logs.append('Splitting to Training and Testing Sets') JVM.start(max_heap_size = '{size}m'.format(size = heap_size)) load = Loader(classname = 'weka.core.converters.ArffLoader') # data = load.load_file(path) # save = Saver(classname = 'weka.core.converters.ArffSaver') data = load.load_file(os.path.join(head, 'iris.arff')) # For Debugging Purposes Only data.class_is_last() # For Debugging Purposes Only # data.class_index = cdat.iclss for i, stage in enumerate(self.stages): if stage.code == 'prp.kcv': self.stages[i].status = Pipeline.RUNNING self.logs.append('Splitting Training Set') # TODO - Check if this seed is worth it. seed = assign_if_none(seed, random.randint(0, 1000)) opts = ['-S', str(seed), '-N', str(para.Preprocess.FOLDS)] wobj = Filter(classname = 'weka.filters.supervised.instance.StratifiedRemoveFolds', options = opts + ['-V']) wobj.inputformat(data) tran = wobj.filter(data) self.logs.append('Splitting Testing Set') wobj.options = opts test = wobj.filter(data) for i, stage in enumerate(self.stages): if stage.code == 'prp.kcv': self.stages[i].status = Pipeline.COMPLETE self.logs.append('Performing Feature Selection') feat = [ ] for comb in para.FEATURE_SELECTION: if comb.USE: for i, stage in enumerate(self.stages): if stage.code == 'ats': search = stage.value.search.name evaluator = stage.value.evaluator.name if search == comb.Search.NAME and evaluator == comb.Evaluator.NAME: self.stages[i].status = Pipeline.RUNNING srch = ASSearch(classname = 'weka.attributeSelection.{classname}'.format( classname = comb.Search.NAME, options = assign_if_none(comb.Search.OPTIONS, [ ]) )) ewal = ASEvaluation(classname = 'weka.attributeSelection.{classname}'.format( classname = comb.Evaluator.NAME, options = assign_if_none(comb.Evaluator.OPTIONS, [ ]) )) attr = AttributeSelection() attr.search(srch) attr.evaluator(ewal) attr.select_attributes(tran) meta = addict.Dict() meta.search = comb.Search.NAME meta.evaluator = comb.Evaluator.NAME meta.features = [tran.attribute(index).name for index in attr.selected_attributes] feat.append(meta) for i, stage in enumerate(self.stages): if stage.code == 'ats': search = stage.value.search.name evaluator = stage.value.evaluator.name if search == comb.Search.NAME and evaluator == comb.Evaluator.NAME: self.stages[i].status = Pipeline.COMPLETE models = [ ] for model in para.MODEL: if model.USE: summary = addict.Dict() self.logs.append('Modelling {model}'.format(model = model.LABEL)) summary.label = model.LABEL summary.name = model.NAME summary.options = assign_if_none(model.OPTIONS, [ ]) for i, stage in enumerate(self.stages): if stage.code == 'lrn' and stage.value.name == model.NAME: self.stages[i].status = Pipeline.RUNNING for i, instance in enumerate(data): iclass = list(range(instance.num_classes)) options = assign_if_none(model.OPTIONS, [ ]) classifier = Classifier(classname = 'weka.classifiers.{classname}'.format(classname = model.NAME), options = options) classifier.build_classifier(tran) serializer.write(os.path.join(head, '{name}.{classname}.model'.format( name = name, classname = model.NAME )), classifier) self.logs.append('Testing model {model}'.format(model = model.LABEL)) evaluation = Evaluation(tran) evaluation.test_model(classifier, test) summary.summary = evaluation.summary() frame = pd.DataFrame(data = evaluation.confusion_matrix) axes = sns.heatmap(frame, cbar = False, annot = True) b64str = get_b64_plot(axes) summary.confusion_matrix = addict.Dict({ 'value': evaluation.confusion_matrix.tolist(), 'plot': b64str }) self.logs.append('Plotting Learning Curve for {model}'.format(model = model.LABEL)) buffer = io.BytesIO() plot_classifier_errors(evaluation.predictions, tran, test, outfile = buffer, wait = False) b64str = buffer_to_b64(buffer) summary.learning_curve = b64str buffer = io.BytesIO() plot_roc(evaluation, class_index = iclass, outfile = buffer, wait = False) b64str = buffer_to_b64(buffer) summary.roc_curve = b64str buffer = io.BytesIO() plot_prc(evaluation, class_index = iclass, outfile = buffer, wait = False) b64str = buffer_to_b64(buffer) summary.prc_curve = b64str if classifier.graph: summary.graph = classifier.graph for i, instance in enumerate(test): prediction = classifier.classify_instance(instance) for i, stage in enumerate(self.stages): if stage.code == 'lrn' and stage.value.name == model.NAME: self.stages[i].status = Pipeline.COMPLETE models.append(summary) self.gist.models = models JVM.stop() JSON.write(os.path.join(head, '{name}.cgist'.format(name = name)), self.gist) self.logs.append('Pipeline Complete') self.set_status(Pipeline.COMPLETE)
def write(path, pipeline = None): pipeline = assign_if_none(pipeline, [ ]) with open(path, mode = 'w') as f: json.dump(pipeline, f, indent = 4)