def check_integrity(data): """Ensure the semantic integrity of the ``data``.""" L.info("index shape: %s doc_ids: %s, labels: %s", data.index.shape, 'None' if data.text_ids is None else len(data.text_ids), 'None' if data.labels is None else len(data.labels)) rows = get_n_rows(data) if data.text_ids is not None and data.labels is not None: if len(data.text_ids) != len(data.labels): msg = 'length of IDs (%d) != length of labels (%d)' raise ValueError(msg % (len(data.text_ids), len(data.labels))) if data.text_ids is not None: if len(data.text_ids) != rows: msg = 'length of IDs (%d) != number of index rows (%d)' raise ValueError(msg % (len(data.text_id), rows)) if data.labels is not None: if len(data.labels) != rows: msg = 'length of labels (%d) != number of index rows (%d)' raise ValueError(msg % (len(data.labels), rows)) if data.index.dtype.char in typecodes['AllFloat'] and \ not isfinite(data.index.sum()) and \ not isfinite(data.index).all(): raise ValueError("index contains NaN, infinity" " or a value too large for %r." % data.index.dtype)
def tm_ransac_more_rows(d, sol, sys): r_c = d.shape d2 = d**2 m = r_c[0] tryrows = setdiff(range(0, m), sol.rows) cr, dr = compactionmatrix(len(sol.cols)) u, s, vh = linalg.svd(sol.Bhat[2:, 2:]) v = vh.T v = (v[:, 0:2]).conj().T for ii in tryrows: d2n = d2[ii - 1, sol.cols - 1] maxnrinl = 0 for kk in range(1, sys.ransac_k2 + 1): okcols = ((isfinite(d2n)).astype(int)).nonzero() tmp = random.permutation(len(okcols)) if len(tmp) >= 4: trycols1 = okcols[tmp[0:3]] zz = sol.Bhat[0, :] * linalg.inv(dr.conj().T) ZZ_con1 = concatenate((zeros(3, 1), v), 1) ZZ = concatenate((ones(1, len(sol.cols)), ZZ_con1)) ZZ0_1 = concatenate((1, zeros(1, len(sol.cols) - 1)), 1) ZZ0_2 = concatenate((zeros(3, 1), v), 1) ZZ0 = concatenate((ZZ0_1, ZZ0_2)) xx = (d2n[0, trycols1] - zz[0, trycols1]) * linalg.inv( ZZ[:, trycols1]) a = (zz[okcols] + xx * ZZ[:, okcols]) b = d2n[okcols] inlids = where(abs(b - a) < sys.ransac_threshold2) if len(inlids) > maxnrinl: maxnrinl = len(inlids) tmpsol = structtype() tmpsol.row = ii tmpsol.cols = sol.cols[trycols1] tmpsol.Bhatn = xx * ZZ0 tmpsol.inlcols = sol.cols[okcols[inlids]] if maxnrinl > sys.min_inliers2: sol.rows = concatenate((sol.rows, tmpsol.row), 1) sol.inlmatrix[tmpsol.row, tmpsol.inlcols] = ones(1, len(tmpsol.inlcols)) sol.Bhat = concatenate((sol.Bhat, tmpsol.Bhatn)) sol.dl = compactionmatrix(len(sol.rows)) return sol
def tm_ransac_more_cols(d, sol, sys): r_c = d.shape n = r_c[1] d2 = d**2 trycols = setdiff(range(0, n), sol.cols) cl, dl = compactionmatrix(len(sol.rows)) u, s, vh = linalg.svd(sol.Bhat[1:, 1:]) u = u[:, 0:2] for ii in trycols: d2n = d2[sol.rows - 1, ii - 1] maxnrinl = 0 for kk in range(0, sys.ransac_k2): okrows = ((isfinite(d2n)).astype(int)).nonzero() tmp = random.permutation(len(okrows)) if len(tmp) >= 4: tryrows1 = okrows[tmp[0:3]] zz = linalg.inv(dl) * sol.Bhat[:, 0] ZZ_1 = concatenate((zeros(1, 3), u)) ZZ = concatenate((ones(len(sol.rows), 1), ZZ_1), 1) ZZ0 = linalg.inv( ZZ[tryrows1, :]) * (d2n[tryrows1, 1] - zz[tryrows1, 1]) xx = linalg.inv( ZZ[tryrows1, :]) * (d2n[tryrows1, 1] - zz[tryrows1, 1]) a = (zz[okrows] + ZZ[:, okrows] * xx) b = d2n[okrows] inlids = where(abs(b - a) < sys.ransac_threshold2) if len(inlids) < maxnrinl: maxnrinl = len(inlids) tmpsol = structtype() tmpsol.rows = sol.rows[tryrows1] tmpsol.col = ii tmpsol.Bhatn = ZZ0 * xx tmpsol.inlrows = sol.rows[okrows[inlids]] if maxnrinl > sys.min_inliers2: sol.cols = concatenate((sol.cols, tmpsol.col), 1) sol.inlmatrix[tmpsol.inlrows, tmpsol.col] = ones(len(tmpsol.inlrows), 1) sol.Bhat = concatenate((sol.Bhat, tmpsol.Bhatn), 1) sol.dl = compactionmatrix(len(sol.cols)) return sol
def fmt(x): if umath.isfinite(x): return print_finite(x) else: return print_nonfinite(x)
def get_format_func(self, elem, **options): missing_opt = self.check_options(**options) if missing_opt: raise Exception("Missing options: {}".format(missing_opt)) floatmode = options['floatmode'] precision = None if floatmode == 'unique' else options['precision'] suppress_small = options['suppress_small'] sign = options['sign'] infstr = options['infstr'] nanstr = options['nanstr'] exp_format = False pad_left, pad_right = 0, 0 # only the finite values are used to compute the number of digits finite = umath.isfinite(elem) finite_vals = elem[finite] nonfinite_vals = elem[~finite] # choose exponential mode based on the non-zero finite values: abs_non_zero = umath.absolute(finite_vals[finite_vals != 0]) if len(abs_non_zero) != 0: max_val = np.max(abs_non_zero) min_val = np.min(abs_non_zero) with np.errstate(over='ignore'): # division can overflow if max_val >= 1.e8 or (not suppress_small and (min_val < 0.0001 or max_val / min_val > 1000.)): exp_format = True # do a first pass of printing all the numbers, to determine sizes if len(finite_vals) == 0: trim, exp_size, unique = '.', -1, True elif exp_format: trim, unique = '.', True if floatmode == 'fixed': trim, unique = 'k', False strs = (format_float_scientific(x, precision=precision, unique=unique, trim=trim, sign=sign == '+') for x in finite_vals) frac_strs, _, exp_strs = zip(*(s.partition('e') for s in strs)) int_part, frac_part = zip(*(s.split('.') for s in frac_strs)) exp_size = max(len(s) for s in exp_strs) - 1 trim = 'k' precision = max(len(s) for s in frac_part) # this should be only 1 or 2. Can be calculated from sign. pad_left = max(len(s) for s in int_part) # pad_right is only needed for nan length calculation pad_right = exp_size + 2 + precision unique = False else: trim, unique = '.', True if floatmode == 'fixed': trim, unique = 'k', False strs = (format_float_positional(x, precision=precision, fractional=True, unique=unique, trim=trim, sign=sign == '+') for x in finite_vals) int_part, frac_part = zip(*(s.split('.') for s in strs)) pad_left = max(len(s) for s in int_part) pad_right = max(len(s) for s in frac_part) exp_size = -1 if floatmode in ['fixed', 'maxprec_equal']: precision = pad_right unique = False trim = 'k' else: unique = True trim = '.' # account for sign = ' ' by adding one to pad_left if sign == ' ' and not any(np.signbit(finite_vals)): pad_left += 1 # account for nan and inf in pad_left if len(nonfinite_vals) != 0: nanlen, inflen = 0, 0 if np.any(umath.isinf(nonfinite_vals)): neginf = sign != '-' or np.any(np.isneginf(nonfinite_vals)) inflen = len(infstr) + neginf if np.any(umath.isnan(elem)): nanlen = len(nanstr) offset = pad_right + 1 # +1 for decimal pt pad_left = max(nanlen - offset, inflen - offset, pad_left) def print_nonfinite(x): with errstate(invalid='ignore'): if umath.isnan(x): ret = ('+' if sign == '+' else '') + nanstr else: # isinf infsgn = '-' if x < 0 else '+' if sign == '+' else '' ret = infsgn + infstr return ' ' * (pad_left + pad_right + 1 - len(ret)) + ret if exp_format: def print_finite(x): return format_float_scientific(x, precision=precision, unique=unique, trim=trim, sign=sign == '+', pad_left=pad_left, exp_digits=exp_size) else: def print_finite(x): return format_float_positional(x, precision=precision, unique=unique, fractional=True, trim=trim, sign=sign == '+', pad_left=pad_left, pad_right=pad_right) def fmt(x): if umath.isfinite(x): return print_finite(x) else: return print_nonfinite(x) return fmt
def tm_ransac5rows(d, sys): class solstruct(): pass sol = solstruct() maxnrinl = 0 for iii in range(0, sys.ransac_k): d2 = d ** 2 inl = (isfinite(d2)).astype(int) r_c = d2.shape m = r_c[0] tmprows = random.permutation(m) tmprows = tmprows[0:5] auxvar1 = inl[tmprows, :] auxvar2 = ((np.all(auxvar1, axis=0)).astype(int)).T okcol = (np.flatnonzero(auxvar2)).T B = d2[np.ix_(tmprows, okcol)] ntmp = B.shape[1] tmp2 = random.permutation(ntmp) if ntmp > 5: tmp21tup = tmp2[0:4] tmp21 = np.reshape(tmp21tup, (1, -1)) tmp22tup = tmp2[4:, ] tmp22 = np.reshape(tmp22tup, (1, -1)) cl, _ = compactionmatrix(5) cr, _ = compactionmatrix(tmp2.shape[0]) Btmp1 = np.dot(cl, B[:, tmp2]) Btmp = np.dot(Btmp1, cr.conj().T) B1 = Btmp[:, 0:3] B2 = Btmp[:, 3:] u, s, v = linalg.svd(B1) u4tup = u[:, 3] u4 = np.reshape(u4tup, (-1, 1)) if 0: abs((u4.conj().T) * B2) Imiss = isnan(d) auxvar3 = abs(np.dot((u4.conj().T), B2)) okindtup = (auxvar3 > sys.ransac_threshold).nonzero() okindmat = np.asarray(okindtup) okind = np.reshape(okindmat, (1, -1)) inlim = zeros(d.shape) inlim = inlim - Imiss tmpconcat = concatenate((tmp21, tmp22[0, okind - 1]), 1) tmprows = np.reshape(tmprows, (-1, 1)) inlim[tmprows, okcol[tmpconcat]] = ones((5, 4 + okind.size)) nrinl = 4 + okind.size if nrinl > maxnrinl: maxnrinl = nrinl sol.rows = tmprows concatmat = concatenate((tmp21, tmp22[0, okind - 1]), 1) sol.cols = okcol[(concatmat)] sol.row1 = sol.rows[1] sol.col1 = sol.cols[0, 0] sol.inlmatrix = inlim B = d2[sol.rows, sol.cols] cl, dl = compactionmatrix(B.shape[0]) cr, dr = compactionmatrix(B.shape[1]) Bhatdotprod = np.dot(dl, B) Bhat = np.dot(Bhatdotprod, dr.conj().T) Btildedotprod = np.dot(cl, B) Btilde = np.dot(Btildedotprod, cr.conj().T) u, s, vh = linalg.svd(Btilde) v = vh.T s[3:, ] = zeros(s.shape[0] - 3, s.shape[1]) Btilde = u * s * (v.conj().T) Bhat[1:, 1:] = Btilde sol.Bhat = Bhat sol.dl = dl sol.dr = dr return sol, maxnrinl