Python omp_get_num_procs 예제들, omptbx.omp_get_num_procs Python 예제들

예제 #1

0

파일 보기

파일: simulation.py 프로젝트: zhenwork/adse13_161

            tmp_r += 1
        
    return rank_in_pdb, size_in_pdb, ranks_in_pdb



if __name__=="__main__":

    from mpi4py import MPI
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    size = comm.Get_size()
    
    workaround_nt = int(os.environ.get("OMP_NUM_THREADS",1))
    omptbx.omp_set_num_threads(workaround_nt)
    print("## hello from rank %d of %d"%(rank,size),"with omp_threads=",omp_get_num_procs())
    

    ## assign jobs
    rank_in_pdb, size_in_pdb, ranks_in_pdb = jobAssign(size=size, num_pdb=simparams.num_pdbs, num_img=simparams.num_img[0])


    import datetime
    start_elapse = time.time()

    if rank == 0:
        print("Rank 0 time", datetime.datetime.now())
        from LS49.spectra.generate_spectra import spectra_simulation

        SS = spectra_simulation()
        C = microcrystal(Deff_A = simparams.Deff_A, length_um = simparams.length_um, beam_diameter_um = simparams.beam_diameter_um)

예제 #2

0

파일 보기

파일: step5_batch.py 프로젝트: monarin/LS49

                quick=quick,
                rank=rank)


if __name__ == "__main__":
    from mpi4py import MPI
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    size = comm.Get_size()
    import os, omptbx
    workaround_nt = int(os.environ.get("OMP_NUM_THREADS", 1))
    omptbx.omp_set_num_threads(workaround_nt)
    N_total = 100000  # number of items to simulate
    N_stride = size  # total number of worker tasks
    print("hello from rank %d of %d" % (rank, size), "with omp_threads=",
          omp_get_num_procs())
    import datetime
    start_elapse = time()
    if rank == 0:
        print("Rank 0 time", datetime.datetime.now())
        from LS49.spectra.generate_spectra import spectra_simulation
        from LS49.sim.step5_pad import microcrystal
        print("hello2 from rank %d of %d" % (rank, size))
        SS = spectra_simulation()
        C = microcrystal(
            Deff_A=4000, length_um=4.,
            beam_diameter_um=1.0)  # assume smaller than 10 um crystals
        mt = flex.mersenne_twister(seed=0)
        random_orientations = []
        for iteration in range(N_total):
            random_orientations.append(mt.random_double_r3_rotation_matrix())

예제 #3

0

파일 보기

파일: cyto_batch.py 프로젝트: dwpaley/LS49

def run_batch_job(test_without_mpi=False):
  params,options = parse_input()
  if params.log.by_rank:
    import io, sys
  if params.log.rank_profile:
    import cProfile
    pr = cProfile.Profile()
    pr.enable()

  # workaround for getting master nexus
  os.environ["NXMX_LOCAL_DATA"] = params.nxmx_local_data
  if test_without_mpi or params.test_without_mpi:
    from LS49.adse13_196.mock_mpi import mpiEmulator
    MPI = mpiEmulator()
  else:
    from libtbx.mpi4py import MPI

  comm = MPI.COMM_WORLD
  rank = comm.Get_rank()
  size = comm.Get_size()
  import omptbx
  workaround_nt = int(os.environ.get("OMP_NUM_THREADS",1))
  omptbx.omp_set_num_threads(workaround_nt)
  N_stride = size # total number of worker tasks
  print("hello from rank %d of %d"%(rank,size),"with omp_threads=",omp_get_num_procs())
  import datetime
  start_comp = time()

  print(rank, time(),
    "finished with the calculation of channels, now construct single broadcast")

  if rank == 0:
    print("Rank 0 time", datetime.datetime.now())

    spectrum_dict = {}

    from iotbx.reflection_file_reader import any_reflection_file
    from LS49 import ls49_big_data
    merge_file = os.path.join(ls49_big_data,"adse13_228","cyto_init_merge.mtz")
    Fmerge = any_reflection_file(merge_file).as_miller_arrays()[0].as_amplitude_array()

    print("Fmerge min/max = %f / %f" % (min(Fmerge.data()), max(Fmerge.data())))

    transmitted_info = dict(spectra = spectrum_dict,
                            amplitudes = Fmerge,
                            )
  else:
    transmitted_info = None
  transmitted_info = comm.bcast(transmitted_info, root = 0)
  comm.barrier()
  parcels = list(range(rank,params.N_total,N_stride))

  print(rank, time(), "finished with single broadcast, now set up the rank logger")

  if params.log.by_rank:
    expand_dir = os.path.expandvars(params.log.outdir)
    os.makedirs(expand_dir, exist_ok=True)
    log_path = os.path.join(expand_dir,"rank_%d.log"%rank)
    error_path = os.path.join(expand_dir,"rank_%d.err"%rank)
    #print("Rank %d redirecting stdout/stderr to"%rank, log_path, error_path)
    sys.stdout = io.TextIOWrapper(open(log_path,'ab', 0), write_through=True)
    sys.stderr = io.TextIOWrapper(open(error_path,'ab', 0), write_through=True)

  print(rank, time(), "finished with the rank logger, now construct the GPU cache container")

  try:
    from simtbx.gpu import gpu_energy_channels
    gpu_channels_singleton = gpu_energy_channels (
      deviceId = rank % params.devices_per_node )
      # singleton will instantiate, regardless of cuda, device count, or exascale API
  except ImportError:
    gpu_channels_singleton = None
  comm.barrier()
  import random
  while len(parcels)>0:
    idx = random.choice(parcels)
    cache_time = time()
    print("idx------start-------->",idx,"rank",rank,time())
    # if rank==0: os.system("nvidia-smi")
    tst_one(i_exp=idx,spectra=transmitted_info["spectra"],
        Fmerge=transmitted_info["amplitudes"],
        gpu_channels_singleton=gpu_channels_singleton,
        rank=rank,params=params
    )
    parcels.remove(idx)
    print("idx------finis-------->",idx,"rank",rank,time(),"elapsed",time()-cache_time)
  comm.barrier()
  print("Overall rank",rank,"at",datetime.datetime.now(),
        "seconds elapsed after srun startup %.3f"%(time()-start_elapse))
  print("Overall rank",rank,"at",datetime.datetime.now(),
        "seconds elapsed after Python imports %.3f"%(time()-start_comp))
  if params.log.rank_profile:
    pr.disable()
    pr.dump_stats("cpu_%d.prof"%rank)

예제 #4

0

파일 보기

파일: LY99_batch.py 프로젝트: dwpaley/LS49

def run_LY99_batch(test_without_mpi=False):
    params, options = parse_input()
    log_by_rank = bool(int(os.environ.get("LOG_BY_RANK", 0)))
    rank_profile = bool(int(os.environ.get("RANK_PROFILE", 1)))
    if log_by_rank:
        import io, sys
    if rank_profile:
        import cProfile
        pr = cProfile.Profile()
        pr.enable()

    if test_without_mpi:
        from LS49.adse13_196.mock_mpi import mpiEmulator
        MPI = mpiEmulator()
    else:
        from libtbx.mpi4py import MPI

    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    size = comm.Get_size()
    import omptbx
    workaround_nt = int(os.environ.get("OMP_NUM_THREADS", 1))
    omptbx.omp_set_num_threads(workaround_nt)
    N_total = int(os.environ["N_SIM"])  # number of items to simulate
    N_stride = size  # total number of worker tasks
    print("hello from rank %d of %d" % (rank, size), "with omp_threads=",
          omp_get_num_procs())
    import datetime
    start_comp = time()

    # now inside the Python imports, begin energy channel calculation

    wavelength_A = 1.74  # general ballpark X-ray wavelength in Angstroms
    wavlen = flex.double([12398.425 / (7070.5 + w) for w in range(100)])
    direct_algo_res_limit = 1.7

    local_data = data()  # later put this through broadcast

    GF = gen_fmodel(resolution=direct_algo_res_limit,
                    pdb_text=local_data.get("pdb_lines"),
                    algorithm="fft",
                    wavelength=wavelength_A)
    GF.set_k_sol(0.435)
    GF.make_P1_primitive()

    # Generating sf for my wavelengths
    sfall_channels = {}
    for x in range(len(wavlen)):
        if rank > len(wavlen): break
        if x % size != rank: continue

        GF.reset_wavelength(wavlen[x])
        GF.reset_specific_at_wavelength(
            label_has="FE1",
            tables=local_data.get("Fe_oxidized_model"),
            newvalue=wavlen[x])
        GF.reset_specific_at_wavelength(
            label_has="FE2",
            tables=local_data.get("Fe_reduced_model"),
            newvalue=wavlen[x])
        sfall_channels[x] = GF.get_amplitudes()

    reports = comm.gather(sfall_channels, root=0)
    if rank == 0:
        sfall_channels = {}
        for report in reports:
            sfall_channels.update(report)
    comm.barrier()

    print(
        rank, time(),
        "finished with the calculation of channels, now construct single broadcast"
    )

    if rank == 0:
        print("Rank 0 time", datetime.datetime.now())
        from LS49.spectra.generate_spectra import spectra_simulation
        from LS49.adse13_196.revapi.LY99_pad import microcrystal
        print("hello2 from rank %d of %d" % (rank, size))
        SS = spectra_simulation()
        C = microcrystal(
            Deff_A=4000, length_um=4.,
            beam_diameter_um=1.0)  # assume smaller than 10 um crystals
        from LS49 import legacy_random_orientations
        random_orientations = legacy_random_orientations(N_total)
        transmitted_info = dict(spectra=SS,
                                crystal=C,
                                sfall_info=sfall_channels,
                                random_orientations=random_orientations)
    else:
        transmitted_info = None
    transmitted_info = comm.bcast(transmitted_info, root=0)
    comm.barrier()
    parcels = list(range(rank, N_total, N_stride))

    print(rank, time(),
          "finished with single broadcast, now set up the rank logger")

    if log_by_rank:
        expand_dir = os.path.expandvars(params.logger.outdir)
        log_path = os.path.join(expand_dir, "rank_%d.log" % rank)
        error_path = os.path.join(expand_dir, "rank_%d.err" % rank)
        #print("Rank %d redirecting stdout/stderr to"%rank, log_path, error_path)
        sys.stdout = io.TextIOWrapper(open(log_path, 'ab', 0),
                                      write_through=True)
        sys.stderr = io.TextIOWrapper(open(error_path, 'ab', 0),
                                      write_through=True)

    print(
        rank, time(),
        "finished with the rank logger, now construct the GPU cache container")

    import random
    gpu_instance = get_exascale("gpu_instance", params.context)
    gpu_energy_channels = get_exascale("gpu_energy_channels", params.context)

    gpu_run = gpu_instance(deviceId=rank %
                           int(os.environ.get("DEVICES_PER_NODE", 1)))
    gpu_channels_singleton = gpu_energy_channels(
        deviceId=gpu_run.get_deviceID())
    # singleton will instantiate, regardless of gpu, device count, or exascale API

    comm.barrier()
    while len(parcels) > 0:
        idx = random.choice(parcels)
        cache_time = time()
        print("idx------start-------->", idx, "rank", rank, time())
        # if rank==0: os.system("nvidia-smi")
        tst_one(
            image=idx,
            spectra=transmitted_info["spectra"],
            crystal=transmitted_info["crystal"],
            random_orientation=transmitted_info["random_orientations"][idx],
            sfall_channels=transmitted_info["sfall_info"],
            gpu_channels_singleton=gpu_channels_singleton,
            rank=rank,
            params=params)
        parcels.remove(idx)
        print("idx------finis-------->", idx, "rank", rank, time(), "elapsed",
              time() - cache_time)
    comm.barrier()
    del gpu_channels_singleton
    # avoid Kokkos allocation "device_Fhkl" being deallocated after Kokkos::finalize was called
    print("Overall rank", rank, "at", datetime.datetime.now(),
          "seconds elapsed after srun startup %.3f" % (time() - start_elapse))
    print("Overall rank", rank, "at", datetime.datetime.now(),
          "seconds elapsed after Python imports %.3f" % (time() - start_comp))
    if rank_profile:
        pr.disable()
        pr.dump_stats("cpu_%d.prof" % rank)

예제 #5

0

파일 보기

  def find_peaks_clean(self):
    import omptbx
    # doesn't seem to be any benefit to using more than say 8 threads
    num_threads = min(8, omptbx.omp_get_num_procs(), self.params.nproc)
    omptbx.omp_set_num_threads(num_threads)
    d_min = self.params.fft3d.reciprocal_space_grid.d_min
    rlgrid = 2 / (d_min * self.gridding[0])

    frame_number = self.reflections['xyzobs.px.value'].parts()[2]
    scan_range_min = max(
      int(math.floor(flex.min(frame_number))),
      self.imagesets[0].get_array_range()[0]) # XXX what about multiple imagesets?
    scan_range_max = min(
      int(math.ceil(flex.max(frame_number))),
      self.imagesets[0].get_array_range()[1]) # XXX what about multiple imagesets?
    scan_range = self.params.scan_range
    if not len(scan_range):
      scan_range = [[scan_range_min, scan_range_max]]

    scan = self.imagesets[0].get_scan() # XXX what about multiple imagesets?
    angle_ranges = [
      [scan.get_angle_from_array_index(i, deg=False) for i in range_]
      for range_ in scan_range]

    grid = flex.double(flex.grid(self.gridding), 0)
    sampling_volume_map(grid, flex.vec2_double(angle_ranges),
                        self.imagesets[0].get_beam().get_s0(),
                        self.imagesets[0].get_goniometer().get_rotation_axis(),
                        rlgrid, d_min, self.params.b_iso)

    fft = fftpack.complex_to_complex_3d(self.gridding)
    grid_complex = flex.complex_double(
      reals=grid,
      imags=flex.double(grid.size(), 0))
    grid_transformed = fft.forward(grid_complex)
    grid_real = flex.pow2(flex.real(grid_transformed))

    gamma = 1
    peaks = flex.vec3_double()
    #n_peaks = 200
    n_peaks = 100 # XXX how many do we need?

    dirty_beam = grid_real
    dirty_map = self.grid_real.deep_copy()
    import time
    t0 = time.time()
    peaks = clean_3d(dirty_beam, dirty_map, n_peaks, gamma=gamma)
    t1 = time.time()
    #print "clean_3d took %.2f s" %(t1-t0)

    reciprocal_lattice_points = self.reflections['rlp'].select(
      self.reflections_used_for_indexing)

    peaks = self.optimise_peaks(peaks, reciprocal_lattice_points)

    peaks_frac = flex.vec3_double()
    for p in peaks:
      peaks_frac.append((p[0]/self.gridding[0],
                         p[1]/self.gridding[1],
                         p[2]/self.gridding[2]))
      #print p, peaks_frac[-1]

    if self.params.debug:
      self.debug_write_ccp4_map(grid, "sampling_volume.map")
      self.debug_write_ccp4_map(grid_real, "sampling_volume_FFT.map")
      self.debug_write_ccp4_map(dirty_map, "clean.map")

    self.sites = peaks_frac
    # we don't really know the "volume"" of the peaks, but this method should
    # find the peaks in order of their intensity (strongest first)
    self.volumes = flex.double(range(len(self.sites), 0, -1))

    return

예제 #6

0

파일 보기

파일: cyto_frame_as_reported.py 프로젝트: nksauter/LS49

def run_batch_job(test_without_mpi=False):
  from LS49.adse13_187.cyto_batch import parse_input as cyto_batch_parse_input
  params,options = cyto_batch_parse_input()
  if params.log.by_rank:
    import io, sys
  if params.log.rank_profile:
    import cProfile
    pr = cProfile.Profile()
    pr.enable()

  if test_without_mpi or params.test_without_mpi:
    from LS49.adse13_196.mock_mpi import mpiEmulator
    MPI = mpiEmulator()
  else:
    from libtbx.mpi4py import MPI

  comm = MPI.COMM_WORLD
  rank = comm.Get_rank()
  size = comm.Get_size()
  import omptbx
  workaround_nt = int(os.environ.get("OMP_NUM_THREADS",1))
  omptbx.omp_set_num_threads(workaround_nt)
  N_stride = size # total number of worker tasks
  print("hello from rank %d of %d"%(rank,size),"with omp_threads=",omp_get_num_procs())
  import datetime
  start_comp = time()

  if params.log.by_rank:
    expand_dir = os.path.expandvars(params.log.outdir)
    os.makedirs(expand_dir, exist_ok=True)
    log_path = os.path.join(expand_dir,"rank_%d.log"%rank)
    error_path = os.path.join(expand_dir,"rank_%d.err"%rank)
    #print("Rank %d redirecting stdout/stderr to"%rank, log_path, error_path)
    sys.stdout = io.TextIOWrapper(open(log_path,'ab', 0), write_through=True)
    sys.stderr = io.TextIOWrapper(open(error_path,'ab', 0), write_through=True)

  print(rank, time(), "finished with the rank logger, now delgate parcels")
  os.environ["CCTBX_RECOMMEND_DEVICE"] = "%d"%(rank % int(os.environ.get("CCTBX_DEVICE_PER_NODE",1)))
  print("rank", rank, "device", os.environ["CCTBX_RECOMMEND_DEVICE"])
  N_start = int(os.environ.get("N_START",0))

  comm.barrier()
  if rank == 0:
    os.system("nvidia-smi")
    # client process (requests all the work)
    import random
    parcels = list(range(N_start,N_start + params.N_total))
    while len(parcels) > 0:
      idx = parcels[0]    # random.choice(parcels)
      rankreq = comm.recv(source = MPI.ANY_SOURCE)
      print("Sending parcel",idx,"to rank",rankreq)
      comm.send(idx,dest = rankreq)
      parcels.remove(idx)
    # finally send a stop command to each process
    for rankreq in range(size-1):
      rankreq = comm.recv(source=MPI.ANY_SOURCE)
      comm.send('endrun',dest=rankreq)
  else:
    # server process (does all the work)
    while True:
      # inform the client this worker is ready for an event
      comm.send(rank,dest=0)
      idx = comm.recv(source=0)
      if idx == 'endrun':
        break
      cache_time = time()
      print("idx------start-------->",idx,"rank",rank,time())
      thin_ds1(idx,frame_params=params)
      print("idx------finis-------->",idx,
            "rank",rank,time(),"elapsed %.3fs"%(time()-cache_time))
  comm.barrier()

  print("Overall rank",rank,"at",datetime.datetime.now(),
        "seconds elapsed after srun startup %.3f"%(time()-start_elapse))
  print("Overall rank",rank,"at",datetime.datetime.now(),
        "seconds elapsed after Python imports %.3f"%(time()-start_comp))
  if params.log.rank_profile:
    pr.disable()
    pr.dump_stats("cpu_%d.prof"%rank)