def create_and_populte_quijote(redis_conn): """Load the full text of el quijote and store as independent lines""" # %% with open("/home/teo/_data/red-search/quijote.txt", "rt", encoding="iso-8859-1") as f_in: lines = f_in.readlines() print(len(lines)) docs = [{ "text": line, "id": i, "par_num": i } for i, line in enumerate(lines)] # %% cfg = CollectionConfig(name='qxt', id_fld='id', text_flds=['text'], facet_flds=[], number_flds=['par_num'], stop_words="el la los las de a es".split(" ")) col = Collection(redis_conn).configure(cfg) # %% clear_collection(col) # %% com.timeit(lambda: coll.index_documents(col, docs, batch_size=100))
def write_connection_file(outlayer, confile): """ column 1: id of the first cell column 2: id of the second cell column 3: area of the connection between the two cells column 4: meang of the connection between the two cells """ # have to generate gap junctions associated with outlayer layer_gids = set() gaps = [] # all the layer gids for icircle in range(cellorg.ncircle[outlayer]): for ipt in range(cellorg.npts[outlayer][icircle]): gid = cellorg.org2gid(outlayer, icircle, ipt) layer_gids.add(gid) timeit("determined set of gids in layer (%d)"%(len(layer_gids),)) # all the gaps with both sides in the layer for gid in layer_gids: gs = mkgap.gaps_for_gid(gid) for gap in gs: if gap.gid1 in layer_gids and gap.gid2 in layer_gids: gaps.append(gap) timeit("determined gaps with both sides in layer (%d)"%(len(gaps),)) cf = open(confile, "w") for gap in gaps: cf.write("%d %d %g %d %g\n" % (gap.gid1, gap.gid2, gap.area, is_purkinje_gap(gap.gid1, gap.gid2), conductance(gap))) cf.close()
def mkcells(gidinfo): timeit() for gid in gidinfo: x, y, z = gidinfo[gid] cell = h.Cell() gidinfo[gid] = CellInfo(cell) # cell shape is actually an arc and area is fastidious with respect # to all 6 sides. But length # treated as line distance between org points (interior corners # in circumferential direction. Set diam so area is correct including # end areas. cell.soma.pt3dclear() cell.soma.pt3dadd(x, y, z, 1.) ilayer, icircle, ipt = gid2org(gid) x1, y1, z1 = xyz(ilayer, icircle, ipt + 1) cell.soma.pt3dadd(x1, y1, z1, 1.) length = cell.soma.L area = sum(mkgap.cell_side_areas(gid)) diam = area / pi / length cell.soma.diam = diam assert (isclose(cell.soma(.5).area(), area, abs_tol=area * 1e-5)) cell.position(x, y, z) pc.set_gid2node(gid, rank) nc = cell.connect2target(None) pc.cell(gid, nc) x = pc.allreduce(len(gidinfo), 1) pr("Global number of real cells is %d" % x) timeit("mkcells")
def run(G, k, iterations=5): total_average = 0.0 max_average = 0.0 min_average = float('inf') average_query_time = 0.0 average_dijkstra_time = 0.0 with open(f'results/{k}_{G.name[:-9]}.log', 'w') as output: print(f'Running algorithm on {G.name}, k={k}', file=output) print(f'Nodes: {len(G)}, Edges: {len(G.edges)}', file=output) # draw_graph.draw(G) # how to draw the graph with it's weights algo = ApproximateDistanceOracles(G, k=k) time = {} timeit(algo.pre_processing, output=time)() print('Pre-processing time:', time['pre_processing'] / 1000, file=output) print('Running algorithm', file=output) for i in range(iterations): # Iterating each node and its shortest paths distances start = datetime.now() for source_node, dijkstra_distances in nx.all_pairs_dijkstra_path_length( G): average_dijkstra_time += (datetime.now() - start).total_seconds() # Querying & timing our algorithm times = {} algo_distances = [ timeit(algo.compute_distance, log_name=f'{source_node, target_node}', output=times)(source_node, target_node) for target_node in G ] # Comparing result node_stretch = average_difference(algo_distances, dijkstra_distances.values()) min_average = min(min_average, node_stretch) max_average = max(max_average, node_stretch) total_average += node_stretch average_query_time += avg(times.values()) start = datetime.now() d = len(G) * iterations total_average /= d average_query_time /= d average_dijkstra_time /= d print(f'Total average stretch: {total_average}', f'Average query time: {average_query_time}', f'Average dijkstra time: {average_dijkstra_time}', f'Max stretch value: {max_average}', f'Min stretch value: {min_average}', sep='\n', file=output)
def mknet(): h.load_file("cell.hoc") timeit() cellconread() timeit("cellconread makes gapinfo") mkcells(gidinfo) mkgaps(gidinfo, mkgap.gaps) setallgaps(param.meang, 1000.0, 0.0) #special_gap_params() h.verifyHalfGap()
def mkgaps(gidinfo, gaps): timeit() mark = set() for gapinfo in gaps.values(): gg = (gapinfo.gid1, gapinfo.gid2) id = gapinfo.id mkhalfgap(gg[0], gg[1], id, gidinfo, mark) mkhalfgap(gg[1], gg[0], -id, gidinfo, mark) pc.setup_transfer() x = 0 for cell in gidinfo.values(): x += len(cell.gaps) x = pc.allreduce(x, 1) pr("Global number of halfgap is %d" % x) timeit("mkgaps")
def gaps_gid2_copy(): gaps_fill_id() if nhost == 1: return timeit() #assume round robin have = [None] * nhost for gapinfo in gaps.values(): r = gapinfo.gid2 % nhost if r != rank: if have[r] is None: have[r] = [] have[r].append(gapinfo) have = pc.py_alltoall(have) assert (have[rank] == None) for x in have: for gi in (x if x is not None else []): assert ((gi.gid1, gi.gid2) not in gaps) gaps[(gi.gid1, gi.gid2)] = gi timeit("gaps_gid2_copy")
def search_testing(col: Collection): """interactive_testing""" # %% # noinspection PyUnresolvedReferences reload(sch) expr = sch.ContainsApprox("cobre", max_typos=2) red = col.redis # with col.redis.pipeline() as pipe: # for i in range(3): red.ping() ctx = sch.SearchContext(col, col.redis) reload(com) ret = com.timeit(lambda: expr.eval(ctx)) print(len(ret), 'tokens') # pipe.execute() # %% script = """ local extend = function( t1, t2 ) for _, el in ipairs(t2) do table.insert( t1, el ) end return t1 end local search = function (key, pat) local cur = '0' local res = {} while 1 do local r = redis.call('sscan', key, cur, 'match', pat, 'count', '25000' ) extend( res, r[2] ) if r[1] == '0' then break end cur = r[1] end return res end local res = {} for _, arg in ipairs( ARGV ) do res = extend( res, search(KEYS[1], arg) ) end return res """ t0 = dt.datetime.now() ret = col.redis.eval(script, 1, f'{col.name}/text_tokens', *expr.patterns) t1 = dt.datetime.now() print((t1 - t0).total_seconds() * 1000, len(ret))
def getdat(fname): import pickle pfile = fname + ".pkl" try: p = pickle.load(open(pfile, "rb")) timeit("pickle load") except: ras = raster(fname) timeit("input raster") p = pras_(ras) timeit("construct pras") pickle.dump(p, open(pfile, "wb")) timeit("pickle dump") return p
def write_files(fname, outlayer, want_confile=False): spkfile="layer%d.spk"%outlayer morphfile="morphology_layer%d.txt"%outlayer write_morphfile(outlayer, morphfile) timeit ("wrote %s" % (morphfile)) if want_confile: confile="connection_layer%d.txt"%outlayer print("wait...writing %s will take a while because have to construct gap junctions"%confile) write_connection_file(outlayer, confile) timeit ("wrote %s" % (confile)) print("wait...writing %s will take a while"%spkfile) write_spkfile(fname, spkfile) timeit ("wrote %s" % (spkfile))
def cellconread(): timeit() # new Heart-3D paraboloid organization global ncon, ncell, connections import cellorg, mkgap from cellorg import sim_layers, sim_circles ncell = cellorg.ngid #old way iterating over all possible cells takes 5.4 seconds for gid in range(rank, ncell, nhost): ilayer, icircle, ipt = cellorg.gid2org(gid) if icircle < cellorg.ncircle[ilayer] - 1: if cellorg.is_simulated(ilayer, icircle, ipt): xyz = cellorg.xyz(ilayer, icircle, ipt) gidinfo[gid] = xyz ''' #new way iterating only over cells that exist takes import param as p for ilayer in sim_layers: for icircle in sim_circles[ilayer]: i0 = cellorg.angle2ipt(p.simulation_angledeg[0]*2*pi/360, ilayer, icircle) i1 = cellorg.angle2ipt(p.simulation_angledeg[1]*2*pi/360, ilayer, icircle) for ipt in range(i0, i1+1): if cellorg.is_simulated(ilayer, icircle, ipt): gid = cellorg.org2gid(ilayer, icircle, ipt) if gid%nhost == rank: gidinfo[gid] = cellorg.xyz(ilayer, icircle, ipt) ''' timeit("gidinfo setup") for gid in gidinfo: # because of floating round-off error which may or may not create # a gap with area close to 0, guarantee gap pairs by only creating # gaps where gid1 < gid2 mkgap.gaps_for_gid(gid) n = int(pc.allreduce(n_triang_zero(), 1)) pr("accurate_triang_area calculation returned zero %d times" % n) timeit("connections determined") # for parallel, copy gid2 gaps to ranks that need them mkgap.gaps_gid2_copy() connections = mkgap.gaps
import apischema import pydantic from common import timeit class ChildPy(pydantic.BaseModel): value: int class DataPy(pydantic.BaseModel): data: List[ChildPy] class Child(NamedTuple): value: int class Data(NamedTuple): data: List[Child] data = {'data': [{'value': i} for i in range(300000)]} if sys.argv[1] == '--typedload': print(timeit(lambda: load(data, Data))) elif sys.argv[1] == '--pydantic': print(timeit(lambda: DataPy(**data))) elif sys.argv[1] == '--apischema': print(timeit(lambda: apischema.deserialize(Data, data)))
from neuron import h, gui import sys this_module = sys.modules[__name__] from common import timeit import cellorg from cellorg import xyz, gid2org, npts timeit('import cellorg') def raster(fname): f = open(fname) ras = [] for line in f: [t, gid] = [float(x) for x in line.split()] gid = int(gid) ras.append((t, gid)) f.close() return ras dtt = 2.0 grp = 0 sgrp = 0 pras = None def pras_(ras): global tt p = [] tt = 0.0
import sys from typedload import load import pydantic from common import timeit class ChildPy(pydantic.BaseModel): value: int class DataPy(pydantic.BaseModel): data: List[ChildPy] class Child(NamedTuple): value: int class Data(NamedTuple): data: List[Child] data = {'data': [{'value': i} for i in range(300000)]} if sys.argv[1] == '--typedload': print(timeit(lambda: load(data, Data))) elif sys.argv[1] == '--pydantic': print(timeit(lambda: DataPy(**data)))
from common import timeit, pr, pc, rank timeit() import param as p from morphdef import circle_origins from morphdef import distance, const_sep_layer_origins, addmul, normgrad from p100from import p100from from math import pi, cos, sin, tan # structure that allows fast calculation of cell position and neighbors # paraboloid contains layers contains circle origins # The origin is a pair of (r, 0.0, z) tuples specifying the p000 and p100 # corners of the cell/region. These corners are shared by the cell/region # (ilayer, icircle-1, 0) corners p010 and p110 # We really need to save space by using (r, z) instead of (r, 0.0, z) # Since each of the four parabola edges is a piecewise linear function # between the two corner points and corner points in the adjacent layer, # it is useful to also supply those extra interior (r, 0.0, z) points. # Instead of origin being (p000, p100). It is now # ((r, 0.0, z), (r, 0.0, z), RegionFace) class RegionFace: def __init__(self): #self.p0 = None # p000 #self.p1 = None # p100 self.p0b = None # (jcircle, [p110] of layer-1 where p110 points are between p000 and p010 self.p1b = None # (jcircle, [p010] of layer+1 where p010 points are between p100 and p110 # the b stands for breakpoint. This misses the point that there may # be no breakpoints and jcircle is the first circle in the adjacent layer # that is relevant to gap connectivity.
}, { 'timestamp': 44.3, 'type': 'file', 'filename': 'qweqweqweqwe.txt', 'sender': '3141', 'receiver': '3145', 'url': 'http://url', }, ] * 50000 data = {'data': events} if sys.argv[1] == '--typedload': print(timeit(lambda: load(data, EventList))) elif sys.argv[1] == '--pydantic': print(timeit(lambda: EventListPy(**data))) elif sys.argv[1] == '--apischema': print(timeit(lambda: apischema.deserialize(EventList, data))) if sys.argv[1] == '--apischema-discriminator': try: from typing import Annotated except ImportError: pass else: discriminator = apischema.discriminator( "type", {"message": EventMessage, "ping": EventPing, "file": EventFile} ) class DiscriminatedEventList(NamedTuple): data: Tuple[Annotated[Event, discriminator], ...]