def orthogonalize(w, basis, ips=None, nblock=4): # verbosity t = gpt.timer("orthogonalize", verbose_performance) n = len(basis) if n == 0: return grid = basis[0].grid i = 0 if verbose_performance: cgpt.timer_begin() for i in range(0, n, nblock): t("rank_inner_product") lip = gpt.rank_inner_product(basis[i : i + nblock], w) t("global_sum") grid.globalsum(lip) t("create expression") lip = [complex(x) for x in lip] if ips is not None: for j in range(len(lip)): ips[i + j] = lip[j] expr = w - lip[0] * basis[i + 0] for j in range(1, len(lip)): expr -= lip[j] * basis[i + j] t("linear combination") w @= expr t() if verbose_performance: t_cgpt = gpt.timer("cgpt_orthogonalize", True) t_cgpt += cgpt.timer_end() gpt.message(f"\nPerformance of orthogonalize:\n{t}\n{t_cgpt}")
for tp in [g.ot_singlet(), g.ot_vector_spin_color(4, 3), g.ot_vector_singlet(12)]: for n in [1, 4]: one = [g.lattice(grid, tp) for i in range(n)] two = [g.lattice(grid, tp) for i in range(n)] rng.cnormal([one, two]) # Rank inner product nbytes = (one[0].global_bytes() + two[0].global_bytes()) * N * n * n for use_accelerator, compute_name, access in [ (False, "host", access_host), (True, "accelerator", access_accelerator), ]: # Time dt = 0.0 cgpt.timer_begin() for it in range(N + Nwarmup): access(one) access(two) if it >= Nwarmup: dt -= g.time() ip = g.rank_inner_product(one, two, use_accelerator) if it >= Nwarmup: dt += g.time() # Report GBPerSec = nbytes / dt / 1e9 cgpt_t = g.timer("rip") cgpt_t += cgpt.timer_end() g.message( f"""{N} rank_inner_product
def expr_eval(first, second=None, ac=False): t = gpt.timer("eval", verbose_performance) # this will always evaluate to a (list of) lattice object(s) # or remain an expression if it cannot do so t("prepare") if second is not None: dst = gpt.util.to_list(first) e = expr(second) return_list = False else: assert ac is False if gpt.util.is_list_instance(first, gpt.lattice): return first e = expr(first) lat = get_lattice(e) if lat is None: # cannot evaluate to a lattice object, leave expression unevaluated return first return_list = type(lat) == list lat = gpt.util.to_list(lat) grid = lat[0].grid nlat = len(lat) dst = None t("apply matrix ops") # apply matrix_operators e = apply_type_right_to_left(e, gpt.matrix_operator) t("fast return") # fast return if already a lattice if dst is None: if e.is_single(gpt.lattice): ue, uf, v = e.get_single() if uf == factor_unary.NONE and ue == expr_unary.NONE: return v # verbose output if verbose: gpt.message("eval: " + str(e)) if verbose_performance: cgpt.timer_begin() if dst is not None: t("cgpt.eval") for i, dst_i in enumerate(dst): dst_i.update(cgpt.eval(dst_i.v_obj, e.val, e.unary, ac, i)) ret = dst else: assert ac is False t("get otype") # now find return type otype = get_otype_from_expression(e) ret = [] for idx in range(nlat): t("cgpt.eval") res = cgpt.eval(None, e.val, e.unary, False, idx) t_obj, s_ot = ( [x[0] for x in res], [x[1] for x in res], ) assert s_ot == otype.v_otype t("lattice") ret.append(gpt.lattice(grid, otype, t_obj)) t() if verbose_performance: t_cgpt = gpt.timer("cgpt_eval", True) t_cgpt += cgpt.timer_end() gpt.message(t) gpt.message(t_cgpt) if not return_list: return gpt.util.from_list(ret) return ret