def _process_clusters(infile, scales=None, skipsnaps=None, skipmore_for_select=None, h0=None, m_max_cluster=None, m_min_cluster=None, **kwargs): _log('start', infile) out_arrays = list() read_tree.read_tree(infile) _log('read complete', infile) # all_halos = read_tree.all_halos halo_tree = read_tree.halo_tree nsnaps = len(scales) - skipsnaps - skipmore_for_select for halo in halo_tree.halo_lists[skipsnaps + skipmore_for_select].halos: if halo.parent is not None: # centrals only continue if (halo.mvir / h0 > m_max_cluster.value) or \ (halo.mvir / h0 < m_min_cluster.value): continue cluster_branch = list() for level in range(nsnaps): if len(cluster_branch) == 0: cluster_branch.append(halo) elif cluster_branch[-1] is None: cluster_branch.append(None) else: cluster_branch.append(cluster_branch[-1].prog) cluster_branch = cluster_branch[::-1] for level in range(skipmore_for_select): if cluster_branch[-1] is None: cluster_branch.append(None) else: cluster_branch.append(cluster_branch[-1].desc) out_arrays.append(_extract_cluster_arrays(cluster_branch, h0=h0)) read_tree.delete_tree() return out_arrays
def _process_orbits(infile, scales=None, skipsnaps=None, h0=None, lbox=None, m_min_satellite=None,\ m_max_satellite=None, m_min_cluster=None, m_max_cluster=None, **kwargs): _log(' processing file', infile.split('/')[-1]) read_tree.read_tree(infile) all_halos = read_tree.all_halos halo_tree = read_tree.halo_tree nsnaps = len(scales) - skipsnaps out_arrays = [] for halo in halo_tree.halo_lists[skipsnaps].halos: if (halo.mvir / h0 > m_max_satellite.value) or \ (halo.mvir / h0 < m_min_satellite.value): continue superparent = _get_superparent(halo) if superparent is None: continue if (superparent.mvir / h0 < m_min_cluster.value) or \ (superparent.mvir / h0 > m_max_cluster.value): continue halo_branch = [] superparent_branch = [] for level in range(nsnaps): if len(halo_branch) == 0: halo_branch.append(halo) superparent_branch.append(_get_superparent(halo)) elif halo_branch[-1] is None: halo_branch.append(None) superparent_branch.append(None) else: halo_branch.append(halo_branch[-1].prog) superparent_branch.append(_get_superparent(halo_branch[-1])) halo_branch = halo_branch[::-1] superparent_branch = superparent_branch[::-1] out_arrays.append( _extract_orbit_arrays(halo_branch, superparent_branch, h0=h0)) read_tree.delete_tree() return out_arrays
def create_matching_file_pylori_1branch2comp(): from read_tree import read_tree gene = read_tree("ex_pylori/genes_sub2/FAM000006.fasta_nuc.filtred_aligned.treefile") d_pop_geo_match=dict() for u in ["hpEastAsia","hpAsia2","hpSahul","hpNEAfrica","hpAfrica1","hpAfrica2","outgroup","hpEurope"]: d_pop_geo_match[u]=u for u in ["hspEAsia", "hpEAsia"]: d_pop_geo_match[u]="hpEastAsia" for u in ["hspMaori"]: d_pop_geo_match[u]="hpEastAsia" for u in ["hpAfrica","hpWAfrica","hspSAfrica","hspSAfrica1","hspWAfrica","hspWAfrica1"]: d_pop_geo_match[u]="hpAfrica1" f=open("ex_pylori/matching_pop_genes_1branch2comp", "w") i=0 for u in gene.leaves_unrooted(): s=u.name pylori_pop=s[s.rfind("_")+1:] present_pop=d_pop_geo_match[pylori_pop] if present_pop=="hpEurope": f.write(s) f.write("\t") f.write("hpEurope_4") f.write("\t") f.write("pop") else: f.write(s) f.write("\t") f.write(present_pop) f.write("\t") f.write("pop") i+=1 f.write("\n") i=0 for u in gene.leaves_unrooted(): s=u.name pylori_pop=s[s.rfind("_")+1:] present_pop=d_pop_geo_match[pylori_pop] if present_pop=="hpEurope": f.write(s) f.write("\t") f.write("hpEurope_4") f.write("\t") f.write("pop_Asie") else: f.write(s) f.write("\t") f.write(present_pop) f.write("\t") f.write("pop_Asie") i+=1 if i < len(gene.leaves_unrooted()): f.write("\n") f.close()
def root2pickle(root_file="./RootAnalysis_SVfitAnalysisMuTau.root", out_file="htt_features.pkl", tree_path="Summary/tree"): """Imports data from the .root file using read_tree and saves them to pickle file data can be then accessed with: ``` with open("htt_features.pkl", "rb") as input: legs, jets, global_params, properties = pickle.load(input) ``` """ legs, jets, global_params, properties = rt.read_tree(root_file, tree_path) with open(out_file, "wb") as output: pickle.dump((legs, jets, global_params, properties), output) print("Data from " + root_file + " saved to " + out_file)
def root2pickle(root_file = "./RootAnalysis_SVfitAnalysisMuTau.root", out_file = "htt_features.pkl", tree_path = "Summary/tree"): """Imports data from the .root file using read_tree and saves them to pickle file data can be then accessed with: ``` with open("htt_features.pkl", "rb") as input: legs, jets, global_params, properties = pickle.load(input) ``` """ legs, jets, global_params, properties = rt.read_tree(root_file, tree_path) with open(out_file, "wb") as output: pickle.dump((legs,jets, global_params, properties), output) print("Data from " + root_file + " saved to " + out_file)
def save2binary(in_file="../data/dummy.root", tree_path="Summary/tree", out_file="output/example"): # load data from root file legs, jets, global_params, properties = rt.read_tree(in_file, tree_path) print("[ML]\tNumber of legs: {}".format(len(legs))) print("[ML]\tNumber of jets: {}".format(len(jets))) print("[ML]\tGlobal parameters from data:") for key, value in global_params.iteritems(): print("\t* " + key) print("[ML]\tParticle properties from data:") for key, value in properties.iteritems(): print("\t* " + key) # do some stuf with data # stuff # more stuff ints = {} floats = {} if "nJets30" in global_params: ints = { "nJets30": [int(x) for x in global_params["nJets30"]] } # conversion to integer for key in global_params: if key != "nJets30": floats[key] = global_params[key] for key in properties: floats[key] = properties[key] try: # NOT GENERIC ENOUGH!!! data = HTT_generator(legs, jets, ints, floats) # print(next(data)) # writing to file binary_template.wpisz(data) except: print "[ERROR] GENERATION FAILED! MESSAGE:", sys.exc_info()[0] raise
def save2binary(in_file="../data/dummy.root", tree_path="Summary/tree", out_file="output/example"): # load data from root file legs, jets, global_params, properties = rt.read_tree(in_file, tree_path) print("[ML]\tNumber of legs: {}".format(len(legs))) print("[ML]\tNumber of jets: {}".format(len(jets))) print("[ML]\tGlobal parameters from data:") for key, value in global_params.iteritems(): print("\t* "+key) print("[ML]\tParticle properties from data:") for key, value in properties.iteritems(): print("\t* "+key) # do some stuf with data # stuff # more stuff ints = {} floats = {} if "nJets30" in global_params: ints = {"nJets30": [int(x) for x in global_params["nJets30"]]} # conversion to integer for key in global_params: if key != "nJets30": floats[key] = global_params[key] for key in properties: floats[key] = properties[key] try: # NOT GENERIC ENOUGH!!! data = HTT_generator(legs, jets, ints, floats) # print(next(data)) # writing to file binary_template.wpisz(data) except: print("[ERROR] GENERATION FAILED! MESSAGE:", sys.exc_info()[0]) raise
def _process_interlopers(infile, outfile=None, skipsnaps=None, h0=None, lbox=None,\ m_min_satellite=None, m_max_satellite=None, interloper_dR=None,\ interloper_dV=None, **kwargs): #read clusters here will happen for each parallel process, but would be copied for each #process anyway, would need to explicitly set up a shared memory object to work around this #note: placed here it gets destroyed when no longer needed cluster_ids = [] cluster_xyzs = [] cluster_vzs = [] cluster_rvirs = [] cluster_vrmss = [] with h5py.File(outfile, 'r', libver=libver) as f: for cluster_key, cluster in f['clusters'].items(): cluster_ids.append(cluster['ids'][-1 - skipsnaps]) cluster_xyzs.append(cluster['xyz'][-1 - skipsnaps]) cluster_vzs.append(cluster['vxyz'][-1 - skipsnaps, 2]) cluster_rvirs.append(cluster['rvir'][-1 - skipsnaps]) cluster_vrmss.append(cluster['vrms'][-1 - skipsnaps]) cluster_ids = np.array(cluster_ids, dtype=np.long) cluster_xyzs = np.array(cluster_xyzs, dtype=np.float) cluster_vzs = np.array(cluster_vzs, dtype=np.float) cluster_rvirs = np.array(cluster_rvirs, dtype=np.float) cluster_vrmss = np.array(cluster_vrmss, dtype=np.float) _log(' processing file', infile.split('/')[-1]) read_tree.read_tree(infile) all_halos = read_tree.all_halos halo_tree = read_tree.halo_tree out_arrays = [] for halo in halo_tree.halo_lists[skipsnaps].halos: if (halo.mvir / h0 > m_max_satellite.value) or \ (halo.mvir / h0 < m_min_satellite.value): continue xyz = np.array([halo.pos[0] / h0, halo.pos[1] / h0, halo.pos[2] / h0], dtype=np.float) vz = np.array([halo.vel[2]], dtype=np.float) D = xyz - cluster_xyzs D[D > lbox.value / 2.] -= lbox.value D[D < -lbox.value / 2.] += lbox.value dvz = np.abs(vz - cluster_vzs + 100.0 * h0 * D[:, 2]) / cluster_vrmss D *= 1.E3 / cluster_rvirs[:, np.newaxis] #rvir in kpc D = np.power(D, 2) is_near = cluster_ids[np.logical_and( np.logical_and( D[:, 0] + D[:, 1] < np.power(interloper_dR, 2), #inside of circle np.sum(D, axis=1) > np.power(interloper_dR, 2) #outside of sphere ), dvz < interloper_dV #inside velocity offset limit )] if len(is_near): out_arrays.append(_extract_interloper_arrays(halo, is_near, h0=h0)) read_tree.delete_tree() return out_arrays
def _process_orbits(infile, outfile=None, scales=None, skipsnaps=None, skipmore_for_select=None, h0=None, lbox=None, m_min_satellite=None, m_max_satellite=None, m_min_cluster=None, m_max_cluster=None, interloper_dR=None, cluster_ids=None, cluster_xyzs=None, cluster_rvirs=None, cluster_mvirs=None, **kwargs): # because of parallel writing, putting reads of the 'outfile' here # causes errors _log(' processing file, reading', infile.split('/')[-1]) read_tree.read_tree(infile) _log(' read complete', infile.split('/')[-1]) # all_halos = read_tree.all_halos halo_tree = read_tree.halo_tree nsnaps = len(scales) - skipsnaps - skipmore_for_select out_arrays = list() halo_list = halo_tree.halo_lists[skipsnaps + skipmore_for_select].halos for progress, halo in enumerate(halo_list): if progress % 1000 == 0: _log(' process progress', progress, '/', len(halo_list), '|', infile.split('/')[-1]) xyz = np.array([halo.pos[0] / h0, halo.pos[1] / h0, halo.pos[2] / h0], dtype=np.float) D = xyz - cluster_xyzs D[D > lbox.value / 2.] -= lbox.value D[D < -lbox.value / 2.] += lbox.value D *= 1.E3 / cluster_rvirs[:, np.newaxis] # rvir in kpc D = np.power(D, 2) is_near = np.sum(D, axis=1) < np.power(interloper_dR, 2) if np.sum(is_near) == 0: continue # if multiple possible hosts pick most massive host_id = cluster_ids[is_near][np.argmax(cluster_mvirs[is_near])] halo_branch = list() superparent_branch = list() for level in range(nsnaps): if len(halo_branch) == 0: halo_branch.append(halo) superparent_branch.append(_get_superparent(halo)) elif halo_branch[-1] is None: halo_branch.append(None) superparent_branch.append(None) else: halo_branch.append(halo_branch[-1].prog) superparent_branch.append(_get_superparent(halo_branch[-1])) halo_branch = halo_branch[::-1] superparent_branch = superparent_branch[::-1] mvir_max = np.nanmax([ halo.mvir / h0 if halo is not None else np.nan for halo in halo_branch ]) if (mvir_max > m_max_satellite.value) or \ (mvir_max / h0 < m_min_satellite.value): continue for level in range(skipmore_for_select): if halo_branch[-1] is None: halo_branch.append(None) superparent_branch.append(None) else: halo_branch.append(halo_branch[-1].desc) superparent_branch.append(_get_superparent(halo_branch[-1])) out_arrays.append( _extract_orbit_arrays(host_id, halo_branch, superparent_branch, h0=h0, skipmore_for_select=skipmore_for_select)) read_tree.delete_tree() _log(' processing complete', infile.split('/')[-1]) return out_arrays