def build(self, *, source, C, class_weight): representation = source.data["representation"] value = source.data["value"] assert issubclass(value.dtype.type, np.integer) assert set(value) == set([0, 1]) _lr = sklearnLogisticRegression( penalty="l2", dual=False, tol=0.0001, C=C, fit_intercept=True, intercept_scaling=1, class_weight=class_weight, random_state=43, solver="liblinear", max_iter=100, multi_class="ovr", verbose=0, warm_start=False, n_jobs=1, ) _lr.fit(representation, value) assert _lr.classes_[0] == 0 assert _lr.classes_[1] == 1 self.data["w"] = Variable(_lr.coef_.copy().reshape(1, -1)) self.data["b"] = Variable(float(_lr.intercept_))
def build(self, *, source, alpha): representation = source.data["representation"] value = source.data["value"] if alpha is None: _linreg = sklearnLinearRegression() else: _linreg = Ridge(alpha=alpha) _linreg.fit(representation, value) self.data["w"] = Variable(_linreg.coef_.copy().reshape(1,-1)) self.data["b"] = Variable(float(_linreg.intercept_))
def build(self, *, source): classes, probability = zip(*sorted(Counter(source.data["value"]).items())) classes = np.array(classes, dtype=source.data["value"].dtype) probability = np.array(probability, dtype=np.float) probability /= probability.sum() self.data[("value_statistics", "classes")] = Header(classes) self.data[("value_statistics", "probability")] = Variable(probability)
def build(self, *, source, variant): if variant == "mean": v = float(np.mean(source.data["value"])) elif variant == "median": v = float(np.median(source.data["value"])) else: raise ValueError("'variant' must be 'mean' or 'median'") self.data["value_constant"] = Variable(v)
def __call__(self, x): response = self.server.request(x) while not isinstance(response, Result): if isinstance(response, ToDo): y, header = self.f(response.x) dd = DataDict() dd["hx"] = Variable(response.hx) dd["y"] = Variable(y) dd["header"] = Variable(header) dd._save(response.dirname, str(response)) del dd, y, header self.server.done(response) elif isinstance(response, Wait): time.sleep(response.t) else: raise TypeError(response) response = self.server.request(x) return response.y, response.header
def add_pending_hx(self, hx): if not os.path.exists(self.pending_dirname): os.makedirs(self.pending_dirname) dirname = safe_path_join(str(os.getpid()), dirname=self.cache_dirname) lock_dirname = safe_path_join(str(os.getpid()), dirname=self.pending_dirname) assert not os.path.exists(dirname) assert not os.path.exists(lock_dirname) dd = DataDict() dd["hx"] = Variable(hx) dd._save(lock_dirname, self.uid) return dirname, lock_dirname
def merge_sorted_hx_data(self, dd1, dd2): # insert dd1 into dd2, both already have sorted unique hx insert_idx = np.searchsorted(dd2["hx"], dd1["hx"], side="left", sorter=None) new_hx1_mask = np.zeros(dd1["hx"].shape[0], dtype=np.bool) new_hx1_mask[insert_idx == dd2["hx"].shape[0]] = True _mask = insert_idx < dd2["hx"].shape[0] _m2 = dd2["hx"][insert_idx[_mask]] != dd1["hx"][_mask] new_hx1_mask[np.arange(len(new_hx1_mask))[_mask][_m2]] = True del _mask, _m2 if new_hx1_mask.shape[0] == 0: return dd2 else: final_hx = herbivores.insert( dd2["hx"], insert_idx[new_hx1_mask], dd1["hx"][new_hx1_mask], axis=0, ) y1, y2, final_header = merge_columns(dd1["y"], dd1["header"], dd2["y"], dd2["header"], 0.) final_y = herbivores.insert(y2, insert_idx[new_hx1_mask], y1, axis=0) dd = DataDict() dd["hx"] = Variable(final_hx) dd["y"] = Variable(final_y) dd["header"] = Variable(final_header) return dd
def build(self, *, source, k): assert isinstance(k, int) assert k > 0 self.data["k"] = Variable(k) self.data["value_original"] = Variable(source.data["value"])