def resolve_multiple_mergers(bh_map): for k, v in bh_map.items(): if v[0] in bh_map: old_target = v[0] old_weight = v[1] bh_map[k] = bh_map[old_target][0], v[1] * bh_map[old_target][1] logger.info( "Multi-merger detected; reassigning %d->%d (old) %d (new)", k, old_target, bh_map[k][0]) resolve_multiple_mergers(bh_map) return
def _generate_timestep_pairs(self): logger.info("generating pairs of timesteps") base_sim = db.sim_query_from_name_list(self.args.sims) pairs = [] for x in base_sim: ts = self.session.query(tangos.core.timestep.TimeStep).filter_by( simulation_id=x.id, available=True).order_by( tangos.core.timestep.TimeStep.redshift.desc()).all() for a, b in zip(ts[:-1], ts[1:]): pairs.append((a, b)) if self.args.backwards: pairs = pairs[::-1] return pairs
def add_missing_trackdata_and_BH_objects(timestep, this_step_bh_iords, existing_bhobj_iords, session): with parallel_tasks.ExclusiveLock("bh"): track, track_nums = db.tracking.get_trackers(timestep.simulation) with session.no_autoflush: tracker_to_add = collect_bh_trackers(this_step_bh_iords, timestep.simulation, track_nums) halo_to_add = generate_missing_bh_objects(this_step_bh_iords, timestep, existing_bhobj_iords) session.add_all(tracker_to_add) session.add_all(halo_to_add) session.commit() logger.info("Committed %d new trackdata and %d new BH objects for %r", len(tracker_to_add), len(halo_to_add), timestep)
def diff(options): from ..testing import db_diff differ = db_diff.TangosDbDiff(options.uri1, options.uri2, ignore_keys=options.ignore_value_of) if options.simulation: differ.compare_simulation(options.simulation) elif options.timestep: differ.compare_timestep(options.timestep) elif options.object: differ.compare_object(options.object) else: differ.compare() result = differ.failed if result: logger.info("Differences found. Exiting with status 1.") sys.exit(1) else: logger.info("No differences found.")
def _generate_timestep_pairs_from_sims(self, sim1, sim2): assert sim1 != sim2, "Can't link simulation to itself" logger.info("Match timesteps of %r to %r", sim1, sim2) ts1s = sim1.timesteps ts2s = sim2.timesteps pairs = [] for ts1 in ts1s: ts2 = self._get_best_timestep_matching(ts2s, ts1) pairing_is_mutual = (self._get_best_timestep_matching(ts1s, ts2) == ts1) if pairing_is_mutual: logger.info("Pairing timesteps: %r and %r", ts1, ts2) pairs += [(ts1, ts2)] else: logger.warn("No pairing found for timestep %r", ts1) return pairs
def run_calculation_loop(self): parallel_tasks.database.synchronize_creator_object() pair_list = self._generate_timestep_pairs() if len(pair_list) == 0: logger.error("No timesteps found to link") return pair_list = parallel_tasks.distributed(pair_list) object_type = core.halo.Halo.object_typecode_from_tag(self.args.type_) for s_x, s in pair_list: logger.info("Linking %r and %r", s_x, s) if self.args.force or self.need_crosslink_ts(s_x, s, object_type): self.crosslink_ts(s_x, s, 0, self.args.hmax, self.args.dmonly, object_typecode=object_type)
def _db_import_export(target_session, from_session, *sims): external_id_to_internal_halo = {} translated_halolink_ids = [] if len(sims)==0: sims = [x.id for x in all_simulations(from_session)] for sim in sims: ext_sim = get_simulation(sim, from_session) sim = Simulation(ext_sim.basename) target_session.add(sim) logger.info("Transferring simulation %s", ext_sim) halos_this_ts = [] for p_ext in ext_sim.properties: dic = get_or_create_dictionary_item( target_session, p_ext.name.text) p = SimulationProperty(sim, dic, p_ext.data) halos_this_ts.append(p) for tk_ext in ext_sim.trackers: tk = TrackData(sim, tk_ext.halo_number) tk.particles = tk_ext.particles tk.use_iord = tk_ext.use_iord halos_this_ts.append(tk) target_session.add_all(halos_this_ts) for ts_ext in ext_sim.timesteps: logger.info("Transferring timestep %s",ts_ext) ts = TimeStep(sim, ts_ext.extension) ts.redshift = ts_ext.redshift ts.time_gyr = ts_ext.time_gyr ts.available = True target_session.add(ts) halos_this_ts = [] logger.info("Transferring objects for %s", ts_ext) for h_ext in ts_ext.objects: h = SimulationObjectBase(ts, h_ext.halo_number, h_ext.finder_id, h_ext.finder_offset, h_ext.NDM, h_ext.NStar, h_ext.NGas, h_ext.object_typecode) h.external_id = h_ext.id halos_this_ts.append(h) target_session.add_all(halos_this_ts) target_session.commit() for h in halos_this_ts: assert h.id is not None and h.id > 0 external_id_to_internal_halo[h.external_id] = h properties_this_ts = [] logger.info("Transferring object properties for %s", ts_ext) for h_ext in ts_ext.objects: h_new = external_id_to_internal_halo[h_ext.id] for p_ext in h_ext.properties: dic = get_or_create_dictionary_item( target_session, p_ext.name.text) dat = p_ext.data_raw if dat is not None: p = HaloProperty(h_new, dic, dat) target_session.add(p) target_session.commit() for ts_ext in ext_sim.timesteps: logger.info("Transferring halolinks for timestep %s", ts_ext) sys.stdout.flush() _translate_halolinks( target_session, ts_ext.links_from, external_id_to_internal_halo, translated_halolink_ids) _translate_halolinks( target_session, ts_ext.links_to, external_id_to_internal_halo, translated_halolink_ids) target_session.commit() logger.info("Done")
def crosslink_ts(self, ts1, ts2, halo_min=0, halo_max=None, dmonly=False, threshold=config.default_linking_threshold, object_typecode=0): """Link the halos of two timesteps together :type ts1 tangos.core.TimeStep :type ts2 tangos.core.TimeStep""" logger.info("Gathering halo information for %r and %r", ts1, ts2) halos1 = self.make_finder_id_to_halo_map(ts1, object_typecode) halos2 = self.make_finder_id_to_halo_map(ts2, object_typecode) with parallel_tasks.ExclusiveLock("create_db_objects_from_catalog"): same_d_id = core.dictionary.get_or_create_dictionary_item( self.session, "ptcls_in_common") self.session.commit() output_handler_1 = ts1.simulation.get_output_handler() output_handler_2 = ts2.simulation.get_output_handler() if type(output_handler_1).match_objects != type( output_handler_2).match_objects: logger.error( "Timesteps %r and %r cannot be crosslinked; they are using incompatible file readers", ts1, ts2) return # keep the files alive throughout (so they are not garbage-collected after the first match_objects): snap1 = ts1.load() snap2 = ts2.load() try: cat = output_handler_1.match_objects( ts1.extension, ts2.extension, halo_min, halo_max, dmonly, threshold, core.halo.Halo.object_typetag_from_code(object_typecode), output_handler_for_ts2=output_handler_2) back_cat = output_handler_2.match_objects( ts2.extension, ts1.extension, halo_min, halo_max, dmonly, threshold, core.halo.Halo.object_typetag_from_code(object_typecode), output_handler_for_ts2=output_handler_1) except Exception as e: if isinstance(e, KeyboardInterrupt): raise logger.exception( "Exception during attempt to crosslink timesteps %r and %r", ts1, ts2) return with self.session.no_autoflush: logger.info("Gathering links for %r and %r", ts1, ts2) items = self.create_db_objects_from_catalog( cat, halos1, halos2, same_d_id) logger.info("Identified %d links between %r and %r", len(items), ts1, ts2) items_back = self.create_db_objects_from_catalog( back_cat, halos2, halos1, same_d_id) logger.info("Identified %d links between %r and %r", len(items_back), ts2, ts1) with parallel_tasks.ExclusiveLock("create_db_objects_from_catalog"): logger.info("Preparing to commit links for %r and %r", ts1, ts2) self.session.add_all(items) self.session.add_all(items_back) self.session.commit() logger.info("Finished committing total of %d links for %r and %r", len(items) + len(items_back), ts1, ts2)
def scan_for_BHs(files, session): for timestep in parallel_tasks.distributed(files): logger.info("Processing %s", timestep) try: timestep_particle_data = timestep.load() except: logger.warning("File not found - continuing") continue if len(timestep_particle_data.star) < 1: logger.warning("No stars - continuing") continue timestep_particle_data.physical_units() logger.info( "Gathering existing BH halo information from database for step %r", timestep) bhobjs = timestep.bhs.all() existing_bh_nums = [x.halo_number for x in bhobjs] logger.info("...found %d existing BHs", len(existing_bh_nums)) logger.info("Gathering BH info from simulation for step %r", timestep) bh_iord_this_timestep = timestep_particle_data.star['iord'][np.where( timestep_particle_data.star['tform'] < 0)[0]] bh_mass_this_timestep = timestep_particle_data.star['mass'][np.where( timestep_particle_data.star['tform'] < 0)[0]] logger.info("Found %d black holes for %r", len(bh_iord_this_timestep), timestep) logger.info( "Updating BH trackdata and BH objects using on-disk information from %r", timestep) add_missing_trackdata_and_BH_objects(timestep, bh_iord_this_timestep, existing_bh_nums, session) session.expire_all() logger.info("Calculating halo associations for BHs in timestep %r", timestep) bh_cen_halos, bh_halos = bh_halo_assign(timestep_particle_data) # re-order our information so that links refer to BHs in descending order of mass bh_order_by_mass = np.argsort(bh_mass_this_timestep)[::-1] bh_iord_this_timestep = bh_iord_this_timestep[bh_order_by_mass] if bh_halos is not None: bh_halos = bh_halos[bh_order_by_mass] if bh_cen_halos is not None: bh_cen_halos = bh_cen_halos[bh_order_by_mass] logger.info("Freeing the timestep particle data") with check_deleted(timestep_particle_data): del (timestep_particle_data) if bh_halos is not None: assign_bh_to_halos(bh_halos, bh_iord_this_timestep, timestep, "BH") if bh_cen_halos is not None: assign_bh_to_halos(bh_cen_halos, bh_iord_this_timestep, timestep, "BH_central", "host_halo")
def generate_halolinks(session, fname, pairs): for ts1, ts2 in parallel_tasks.distributed(pairs): bh_log = None if BlackHolesLog.can_load(ts2.filename): bh_log = BlackHolesLog(ts2.filename) elif ShortenedOrbitLog.can_load(ts2.filename): bh_log = ShortenedOrbitLog(ts2.filename) if bh_log is None: logger.error("Warning! No orbit file found!") links = [] mergers_links = [] bh_map = {} logger.info("Gathering BH tracking information for steps %r and %r", ts1, ts2) with parallel_tasks.ExclusiveLock("bh"): dict_obj = db.core.get_or_create_dictionary_item( session, "tracker") dict_obj_next = db.core.get_or_create_dictionary_item( session, "BH_merger_next") dict_obj_prev = db.core.get_or_create_dictionary_item( session, "BH_merger_prev") track_links_n, idf_n, idt_n = db.tracking.get_tracker_links( session, dict_obj_next) bh_objects_1, nums1, id1 = get_bh_objs_numbers_and_dbids(ts1) bh_objects_2, nums2, id2 = get_bh_objs_numbers_and_dbids(ts2) tracker_links, idf, idt = db.tracking.get_tracker_links( session, dict_obj) idf_n = np.array(idf_n) idt_n = np.array(idt_n) if len(nums1) == 0 or len(nums2) == 0: logger.info("No BHs found in either step %r or %r... moving on", ts1, ts2) continue logger.info("Generating BH tracker links between steps %r and %r", ts1, ts2) o1 = np.where(np.in1d(nums1, nums2))[0] o2 = np.where(np.in1d(nums2, nums1))[0] if len(o1) == 0 or len(o2) == 0: continue with session.no_autoflush: for ii, jj in zip(o1, o2): if nums1[ii] != nums2[jj]: raise RuntimeError("BH iords are mismatched") exists = np.where((idf == id1[ii]) & (idt == id2[jj]))[0] if len(exists) == 0: links.append( tangos.core.halo_data.HaloLink(bh_objects_1[ii], bh_objects_2[jj], dict_obj, 1.0)) links.append( tangos.core.halo_data.HaloLink(bh_objects_2[jj], bh_objects_1[ii], dict_obj, 1.0)) logger.info("Generated %d tracker links between steps %r and %r", len(links), ts1, ts2) logger.info("Generating BH Merger information for steps %r and %r", ts1, ts2) for l in open(fname[0]): l_split = l.split() t = float(l_split[6]) bh_dest_id = int(l_split[0]) bh_src_id = int(l_split[1]) ratio = float(l_split[4]) # ratios in merger file are ambiguous (since major progenitor may be "source" rather than "destination") # re-establish using the log file: try: ratio = bh_log.determine_merger_ratio(bh_src_id, bh_dest_id) except (ValueError, AttributeError) as e: logger.debug( "Could not calculate merger ratio for %d->%d from the BH log; assuming the .BHmergers-asserted value is accurate", bh_src_id, bh_dest_id) if t > ts1.time_gyr and t <= ts2.time_gyr: bh_map[bh_src_id] = (bh_dest_id, ratio) resolve_multiple_mergers(bh_map) logger.info("Gathering BH merger links for steps %r and %r", ts1, ts2) with session.no_autoflush: for src, (dest, ratio) in bh_map.items(): if src not in nums1 or dest not in nums2: logger.warning( "Can't link BH %r -> %r; missing BH objects in database", src, dest) continue bh_src_before = bh_objects_1[nums1.index(src)] bh_dest_after = bh_objects_2[nums2.index(dest)] if ((idf_n == bh_src_before.id) & (idt_n == bh_dest_after.id)).sum() == 0: mergers_links.append( tangos.core.halo_data.HaloLink(bh_src_before, bh_dest_after, dict_obj_next, 1.0)) mergers_links.append( tangos.core.halo_data.HaloLink(bh_dest_after, bh_src_before, dict_obj_prev, ratio)) logger.info("Generated %d BH merger links for steps %r and %r", len(mergers_links), ts1, ts2) with parallel_tasks.ExclusiveLock("bh"): logger.info("Committing total %d BH links for steps %r and %r", len(mergers_links) + len(links), ts1, ts2) session.add_all(links) session.add_all(mergers_links) session.commit() logger.info("Finished committing BH links for steps %r and %r", ts1, ts2)
def assign_bh_to_halos(bh_halo_assignment, bh_iord, timestep, linkname, hostname=None): session = Session.object_session(timestep) linkname_dict_id = tangos.core.dictionary.get_or_create_dictionary_item( session, linkname) if hostname is not None: host_dict_id = tangos.core.dictionary.get_or_create_dictionary_item( session, hostname) else: host_dict_id = None logger.info("Gathering %s links for step %r", linkname, timestep) links, link_id_from, link_id_to = db.tracking.get_tracker_links( session, linkname_dict_id) halos = timestep.halos.filter_by(object_typecode=0).all() halo_nums = [h.halo_number for h in halos] halo_catind = [h.finder_offset for h in halos] halo_ids = np.array([h.id for h in halos]) logger.info("Gathering bh halo information for %r", timestep) with parallel_tasks.lock.SharedLock("bh"): bh_database_object, existing_bh_nums, bhobj_ids = get_bh_objs_numbers_and_dbids( timestep) bh_links = [] with session.no_autoflush: for bhi, haloi in zip(bh_iord, bh_halo_assignment): haloi = int(haloi) bhi = int(bhi) if haloi not in halo_catind: logger.warning( "Skipping BH in halo %d as no corresponding halo found in the database", haloi) continue if bhi not in existing_bh_nums: logger.warning("Can't find the database object for BH %d", bhi) print(bhi) print(existing_bh_nums) continue bh_index_in_list = existing_bh_nums.index(bhi) halo_index_in_list = halo_catind.index(haloi) bh_obj = bh_database_object[bh_index_in_list] halo_obj = halos[halo_index_in_list] num_existing_links = ( (link_id_from == halo_ids[halo_index_in_list]) & (link_id_to == bhobj_ids[bh_index_in_list])).sum() if num_existing_links == 0: bh_links.append( tangos.core.halo_data.HaloLink(halo_obj, bh_obj, linkname_dict_id)) if host_dict_id is not None: bh_links.append( tangos.core.halo_data.HaloLink(bh_obj, halo_obj, host_dict_id)) logger.info("Committing %d %s links for step %r...", len(bh_links), linkname, timestep) with parallel_tasks.ExclusiveLock("bh"): session.add_all(bh_links) session.commit() logger.info("...done")