Ejemplo n.º 1
0
def _process_clusters(infile,
                      scales=None,
                      skipsnaps=None,
                      skipmore_for_select=None,
                      h0=None,
                      m_max_cluster=None,
                      m_min_cluster=None,
                      **kwargs):

    _log('start', infile)

    out_arrays = list()

    read_tree.read_tree(infile)
    _log('read complete', infile)
    # all_halos = read_tree.all_halos
    halo_tree = read_tree.halo_tree

    nsnaps = len(scales) - skipsnaps - skipmore_for_select
    for halo in halo_tree.halo_lists[skipsnaps + skipmore_for_select].halos:
        if halo.parent is not None:  # centrals only
            continue

        if (halo.mvir / h0 > m_max_cluster.value) or \
           (halo.mvir / h0 < m_min_cluster.value):
            continue

        cluster_branch = list()

        for level in range(nsnaps):

            if len(cluster_branch) == 0:
                cluster_branch.append(halo)

            elif cluster_branch[-1] is None:
                cluster_branch.append(None)

            else:
                cluster_branch.append(cluster_branch[-1].prog)

        cluster_branch = cluster_branch[::-1]

        for level in range(skipmore_for_select):

            if cluster_branch[-1] is None:
                cluster_branch.append(None)

            else:
                cluster_branch.append(cluster_branch[-1].desc)

        out_arrays.append(_extract_cluster_arrays(cluster_branch, h0=h0))

    read_tree.delete_tree()

    return out_arrays
Ejemplo n.º 2
0
def _process_orbits(infile, scales=None, skipsnaps=None, h0=None, lbox=None, m_min_satellite=None,\
                    m_max_satellite=None, m_min_cluster=None, m_max_cluster=None, **kwargs):

    _log('  processing file', infile.split('/')[-1])

    read_tree.read_tree(infile)
    all_halos = read_tree.all_halos
    halo_tree = read_tree.halo_tree

    nsnaps = len(scales) - skipsnaps

    out_arrays = []

    for halo in halo_tree.halo_lists[skipsnaps].halos:

        if (halo.mvir / h0 > m_max_satellite.value) or \
           (halo.mvir / h0 < m_min_satellite.value):
            continue

        superparent = _get_superparent(halo)

        if superparent is None:
            continue

        if (superparent.mvir / h0 < m_min_cluster.value) or \
           (superparent.mvir / h0 > m_max_cluster.value):
            continue

        halo_branch = []
        superparent_branch = []

        for level in range(nsnaps):

            if len(halo_branch) == 0:
                halo_branch.append(halo)
                superparent_branch.append(_get_superparent(halo))

            elif halo_branch[-1] is None:
                halo_branch.append(None)
                superparent_branch.append(None)

            else:
                halo_branch.append(halo_branch[-1].prog)
                superparent_branch.append(_get_superparent(halo_branch[-1]))

        halo_branch = halo_branch[::-1]
        superparent_branch = superparent_branch[::-1]

        out_arrays.append(
            _extract_orbit_arrays(halo_branch, superparent_branch, h0=h0))

    read_tree.delete_tree()

    return out_arrays
Ejemplo n.º 3
0
def create_matching_file_pylori_1branch2comp():
    from read_tree import read_tree
    gene = read_tree("ex_pylori/genes_sub2/FAM000006.fasta_nuc.filtred_aligned.treefile")

    d_pop_geo_match=dict()
    for u in ["hpEastAsia","hpAsia2","hpSahul","hpNEAfrica","hpAfrica1","hpAfrica2","outgroup","hpEurope"]:
        d_pop_geo_match[u]=u
    for u in ["hspEAsia", "hpEAsia"]:
        d_pop_geo_match[u]="hpEastAsia"
    for u in ["hspMaori"]:
        d_pop_geo_match[u]="hpEastAsia"
    for u in ["hpAfrica","hpWAfrica","hspSAfrica","hspSAfrica1","hspWAfrica","hspWAfrica1"]:
        d_pop_geo_match[u]="hpAfrica1"


    f=open("ex_pylori/matching_pop_genes_1branch2comp", "w")
    i=0
    for u in gene.leaves_unrooted():
        s=u.name
        pylori_pop=s[s.rfind("_")+1:]
        present_pop=d_pop_geo_match[pylori_pop]
        if present_pop=="hpEurope":
            f.write(s)
            f.write("\t")
            f.write("hpEurope_4")
            f.write("\t")
            f.write("pop")
        else:
            f.write(s)
            f.write("\t")
            f.write(present_pop)
            f.write("\t")
            f.write("pop")
        i+=1
        f.write("\n")
    i=0
    for u in gene.leaves_unrooted():
        s=u.name
        pylori_pop=s[s.rfind("_")+1:]
        present_pop=d_pop_geo_match[pylori_pop]
        if present_pop=="hpEurope":
            f.write(s)
            f.write("\t")
            f.write("hpEurope_4")
            f.write("\t")
            f.write("pop_Asie")
        else:
            f.write(s)
            f.write("\t")
            f.write(present_pop)
            f.write("\t")
            f.write("pop_Asie")
        i+=1
        if i < len(gene.leaves_unrooted()):
            f.write("\n")

    f.close()
Ejemplo n.º 4
0
def root2pickle(root_file="./RootAnalysis_SVfitAnalysisMuTau.root",
                out_file="htt_features.pkl",
                tree_path="Summary/tree"):
    """Imports data from the .root file using read_tree and saves them to pickle file
	data can be then accessed with:
	```
	with open("htt_features.pkl", "rb") as input:
		legs, jets, global_params, properties = pickle.load(input)
	```
	"""
    legs, jets, global_params, properties = rt.read_tree(root_file, tree_path)
    with open(out_file, "wb") as output:
        pickle.dump((legs, jets, global_params, properties), output)
    print("Data from " + root_file + " saved to " + out_file)
Ejemplo n.º 5
0
def root2pickle(root_file = "./RootAnalysis_SVfitAnalysisMuTau.root",
                out_file = "htt_features.pkl",
	        tree_path = "Summary/tree"):
								
	"""Imports data from the .root file using read_tree and saves them to pickle file
	data can be then accessed with:
	```
	with open("htt_features.pkl", "rb") as input:
		legs, jets, global_params, properties = pickle.load(input)
	```
	"""
	legs, jets, global_params, properties = rt.read_tree(root_file, tree_path)
	with open(out_file, "wb") as output:
			pickle.dump((legs,jets, global_params, properties), output)
	print("Data from " + root_file + " saved to " + out_file)
Ejemplo n.º 6
0
def save2binary(in_file="../data/dummy.root",
                tree_path="Summary/tree",
                out_file="output/example"):

    # load data from root file
    legs, jets, global_params, properties = rt.read_tree(in_file, tree_path)

    print("[ML]\tNumber of legs: {}".format(len(legs)))
    print("[ML]\tNumber of jets: {}".format(len(jets)))
    print("[ML]\tGlobal parameters from data:")
    for key, value in global_params.iteritems():
        print("\t*  " + key)
    print("[ML]\tParticle properties from data:")
    for key, value in properties.iteritems():
        print("\t*  " + key)

    # do some stuf with data
    # stuff
    # more stuff

    ints = {}
    floats = {}

    if "nJets30" in global_params:
        ints = {
            "nJets30": [int(x) for x in global_params["nJets30"]]
        }  # conversion to integer

    for key in global_params:
        if key != "nJets30":
            floats[key] = global_params[key]
    for key in properties:
        floats[key] = properties[key]

    try:
        # NOT GENERIC ENOUGH!!!
        data = HTT_generator(legs, jets, ints, floats)
        # print(next(data))

        # writing to file
        binary_template.wpisz(data)

    except:
        print "[ERROR] GENERATION FAILED! MESSAGE:", sys.exc_info()[0]
        raise
Ejemplo n.º 7
0
def save2binary(in_file="../data/dummy.root", tree_path="Summary/tree", out_file="output/example"):

	# load data from root file
	legs, jets, global_params, properties = rt.read_tree(in_file, tree_path)

	print("[ML]\tNumber of legs: {}".format(len(legs)))
	print("[ML]\tNumber of jets: {}".format(len(jets)))
	print("[ML]\tGlobal parameters from data:")
	for key, value in global_params.iteritems():
		print("\t*  "+key)
	print("[ML]\tParticle properties from data:")
	for key, value in properties.iteritems():
		print("\t*  "+key)

	# do some stuf with data
	# stuff
	# more stuff
	
	ints = {}
	floats = {}

	if "nJets30" in global_params:
		ints = {"nJets30": [int(x) for x in global_params["nJets30"]]} # conversion to integer

	for key in global_params:
		if key != "nJets30":
			floats[key] = global_params[key]
	for key in properties:
		floats[key] = properties[key]

	try:
		# NOT GENERIC ENOUGH!!!
		data = HTT_generator(legs, jets, ints, floats)
		# print(next(data))
		
		# writing to file
		binary_template.wpisz(data)

	except:
	    print("[ERROR] GENERATION FAILED! MESSAGE:", sys.exc_info()[0])
	    raise
Ejemplo n.º 8
0
def _process_interlopers(infile, outfile=None, skipsnaps=None, h0=None, lbox=None,\
                         m_min_satellite=None, m_max_satellite=None, interloper_dR=None,\
                         interloper_dV=None, **kwargs):

    #read clusters here will happen for each parallel process, but would be copied for each
    #process anyway, would need to explicitly set up a shared memory object to work around this
    #note: placed here it gets destroyed when no longer needed

    cluster_ids = []
    cluster_xyzs = []
    cluster_vzs = []
    cluster_rvirs = []
    cluster_vrmss = []

    with h5py.File(outfile, 'r', libver=libver) as f:

        for cluster_key, cluster in f['clusters'].items():

            cluster_ids.append(cluster['ids'][-1 - skipsnaps])
            cluster_xyzs.append(cluster['xyz'][-1 - skipsnaps])
            cluster_vzs.append(cluster['vxyz'][-1 - skipsnaps, 2])
            cluster_rvirs.append(cluster['rvir'][-1 - skipsnaps])
            cluster_vrmss.append(cluster['vrms'][-1 - skipsnaps])

    cluster_ids = np.array(cluster_ids, dtype=np.long)
    cluster_xyzs = np.array(cluster_xyzs, dtype=np.float)
    cluster_vzs = np.array(cluster_vzs, dtype=np.float)
    cluster_rvirs = np.array(cluster_rvirs, dtype=np.float)
    cluster_vrmss = np.array(cluster_vrmss, dtype=np.float)

    _log('  processing file', infile.split('/')[-1])

    read_tree.read_tree(infile)
    all_halos = read_tree.all_halos
    halo_tree = read_tree.halo_tree

    out_arrays = []

    for halo in halo_tree.halo_lists[skipsnaps].halos:

        if (halo.mvir / h0 > m_max_satellite.value) or \
           (halo.mvir / h0 < m_min_satellite.value):
            continue

        xyz = np.array([halo.pos[0] / h0, halo.pos[1] / h0, halo.pos[2] / h0],
                       dtype=np.float)
        vz = np.array([halo.vel[2]], dtype=np.float)

        D = xyz - cluster_xyzs
        D[D > lbox.value / 2.] -= lbox.value
        D[D < -lbox.value / 2.] += lbox.value
        dvz = np.abs(vz - cluster_vzs + 100.0 * h0 * D[:, 2]) / cluster_vrmss
        D *= 1.E3 / cluster_rvirs[:, np.newaxis]  #rvir in kpc
        D = np.power(D, 2)

        is_near = cluster_ids[np.logical_and(
            np.logical_and(
                D[:, 0] + D[:, 1] < np.power(interloper_dR,
                                             2),  #inside of circle
                np.sum(D, axis=1) > np.power(interloper_dR,
                                             2)  #outside of sphere
            ),
            dvz < interloper_dV  #inside velocity offset limit
        )]

        if len(is_near):
            out_arrays.append(_extract_interloper_arrays(halo, is_near, h0=h0))

    read_tree.delete_tree()

    return out_arrays
Ejemplo n.º 9
0
def _process_orbits(infile,
                    outfile=None,
                    scales=None,
                    skipsnaps=None,
                    skipmore_for_select=None,
                    h0=None,
                    lbox=None,
                    m_min_satellite=None,
                    m_max_satellite=None,
                    m_min_cluster=None,
                    m_max_cluster=None,
                    interloper_dR=None,
                    cluster_ids=None,
                    cluster_xyzs=None,
                    cluster_rvirs=None,
                    cluster_mvirs=None,
                    **kwargs):

    # because of parallel writing, putting reads of the 'outfile' here
    # causes errors
    _log('  processing file, reading', infile.split('/')[-1])
    read_tree.read_tree(infile)
    _log('  read complete', infile.split('/')[-1])

    # all_halos = read_tree.all_halos
    halo_tree = read_tree.halo_tree

    nsnaps = len(scales) - skipsnaps - skipmore_for_select

    out_arrays = list()

    halo_list = halo_tree.halo_lists[skipsnaps + skipmore_for_select].halos

    for progress, halo in enumerate(halo_list):
        if progress % 1000 == 0:
            _log('    process progress', progress, '/', len(halo_list), '|',
                 infile.split('/')[-1])

        xyz = np.array([halo.pos[0] / h0, halo.pos[1] / h0, halo.pos[2] / h0],
                       dtype=np.float)
        D = xyz - cluster_xyzs
        D[D > lbox.value / 2.] -= lbox.value
        D[D < -lbox.value / 2.] += lbox.value
        D *= 1.E3 / cluster_rvirs[:, np.newaxis]  # rvir in kpc
        D = np.power(D, 2)
        is_near = np.sum(D, axis=1) < np.power(interloper_dR, 2)
        if np.sum(is_near) == 0:
            continue
        # if multiple possible hosts pick most massive
        host_id = cluster_ids[is_near][np.argmax(cluster_mvirs[is_near])]

        halo_branch = list()
        superparent_branch = list()

        for level in range(nsnaps):

            if len(halo_branch) == 0:
                halo_branch.append(halo)
                superparent_branch.append(_get_superparent(halo))

            elif halo_branch[-1] is None:
                halo_branch.append(None)
                superparent_branch.append(None)

            else:
                halo_branch.append(halo_branch[-1].prog)
                superparent_branch.append(_get_superparent(halo_branch[-1]))

        halo_branch = halo_branch[::-1]
        superparent_branch = superparent_branch[::-1]

        mvir_max = np.nanmax([
            halo.mvir / h0 if halo is not None else np.nan
            for halo in halo_branch
        ])
        if (mvir_max > m_max_satellite.value) or \
           (mvir_max / h0 < m_min_satellite.value):
            continue

        for level in range(skipmore_for_select):
            if halo_branch[-1] is None:
                halo_branch.append(None)
                superparent_branch.append(None)
            else:
                halo_branch.append(halo_branch[-1].desc)
                superparent_branch.append(_get_superparent(halo_branch[-1]))

        out_arrays.append(
            _extract_orbit_arrays(host_id,
                                  halo_branch,
                                  superparent_branch,
                                  h0=h0,
                                  skipmore_for_select=skipmore_for_select))

    read_tree.delete_tree()
    _log('  processing complete', infile.split('/')[-1])

    return out_arrays