Example #1
0
def test_getpy_dump_load():
    key_type = np.dtype('u8')
    value_type = np.dtype('u8')

    keys = np.random.randint(1, 1000, size=10**1, dtype=key_type)
    values = np.random.randint(1, 1000, size=10**1, dtype=value_type)

    gp_dict_1 = gp.Dict(key_type, value_type)
    gp_dict_1[keys] = values
    gp_dict_1.dump('test.bin')

    gp_dict_2 = gp.Dict(key_type, value_type)
    gp_dict_2.load('test.bin')

    assert len(gp_dict_1) == len(gp_dict_2)
Example #2
0
def test_getpy_vectorized_methods_with_bytearray_dtype():
    key_type = np.dtype('u8')
    value_type = gp.types['bytearray50']

    gp_dict = gp.Dict(key_type, value_type)

    keys = np.random.randint(1, 1000, size=200, dtype=key_type)
    values = np.packbits([
        np.array([1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1] * 25,
                 dtype=np.bool)
    ] * 200,
                         axis=1).view(value_type)
    gp_dict[keys] = values

    iterated_keys = [key for key in gp_dict]
    iterated_keys_and_values = [(key, value) for key, value in gp_dict.items()]

    select_keys = np.random.choice(keys, size=100)
    select_values = gp_dict[select_keys]

    random_keys = np.random.randint(1, 1000, size=500, dtype=key_type)
    random_keys_mask = gp_dict.contains(random_keys)

    mask_keys = random_keys[random_keys_mask]
    mask_values = gp_dict[mask_keys]

    gp_dict.iadd(keys, values)
    gp_dict.isub(keys, values)
    gp_dict.ior(keys, values)
    gp_dict.iand(keys, values)
Example #3
0
def test_getpy_vectorized_methods_with_default():
    key_type = np.dtype('u8')
    value_type = np.dtype('u8')

    gp_dict = gp.Dict(key_type, value_type, default_value=0)

    keys = np.random.randint(1, 1000, size=200, dtype=key_type)
    values = np.random.randint(1, 1000, size=200, dtype=value_type)

    gp_dict[keys] = values

    iterated_keys = [key for key in gp_dict]
    iterated_keys_and_values = [(key, value) for key, value in gp_dict.items()]

    select_keys = np.random.choice(keys, size=100)
    select_values = gp_dict[select_keys]

    random_keys = np.random.randint(1, 1000, size=500, dtype=key_type)
    random_keys_mask = gp_dict.contains(random_keys)
    random_values_with_defaults = gp_dict[random_keys]

    for random_key_mask, random_value in zip(random_keys_mask,
                                             random_values_with_defaults):
        if not random_key_mask:
            assert random_value == 0
        else:
            assert random_value != 0

    one_values = np.ones(500, dtype=value_type)

    gp_dict.iadd(random_keys, one_values)
    gp_dict.isub(random_keys, one_values)
    gp_dict.ior(random_keys, one_values)
    gp_dict.iand(random_keys, one_values)
Example #4
0
def test_gp_dict():
    import getpy as gp  # type: ignore

    h = gp.Dict(HASH_TYPE, np.uint8)
    h[np.arange(10, dtype=HASH_TYPE)] = np.zeros(10, dtype=np.uint8)
    h[np.arange(5, dtype=HASH_TYPE)] = np.ones(5, dtype=np.uint8)
    expected = {i: i < 5 for i in range(10)}
    assert expected == as_dict(h)
Example #5
0
    def __init__(self, data_format):
        super().__init__(data_format)
        self.nominal_speed = .5
        self.social_score_proximity_threshold_list = [0.2, 0.3, 0.5]
        key_type = np.dtype('i8')
        value_type = np.dtype('f8')
        self.social_score_dict = {}

        self.update_iteration_count_dict = {} 
        for thresh in self.social_score_proximity_threshold_list:
            self.social_score_dict[thresh] = gp.Dict(key_type, value_type, default_value=np.asarray(0.0).astype('f8'))
            
            self.update_iteration_count_dict[thresh] = gp.Dict(key_type, value_type, default_value=np.asarray(0.0).astype('f8'))
        self.time_step_done = False
        self.min_social_score_agent = 0
        self.min_social_score = [float('Inf') for _ in range(len(self.social_score_proximity_threshold_list))]
        self.social_score_collision_threshold = 0.0
        self.agent_radius = 0.2 #needs to be the same as in collision
Example #6
0
def test_getpy_types():
    for key_type, value_type in gp.dict_types:
        gp_dict = gp.Dict(key_type, value_type)

        keys = np.array(range(256), dtype=key_type)
        values = np.array(range(256), dtype=value_type)

        gp_dict[keys] = values

    values = gp_dict[keys]
Example #7
0
def test_getpy_big_dict_u8_u8():
    key_type = np.dtype('u8')
    value_type = np.dtype('u8')

    gp_dict = gp.Dict(key_type, value_type)

    values = np.random.randint(10**15, size=10**4, dtype=value_type)

    for i in range(10**2):
        keys = np.random.randint(10**15, size=10**4, dtype=key_type)
        gp_dict[keys] = values
Example #8
0
def test_getpy_very_big_dict_u4_u4():
    key_type = np.dtype('u4')
    value_type = np.dtype('u4')

    gp_dict = gp.Dict(key_type, value_type)

    values = np.random.randint(10**9, size=10**5, dtype=value_type)

    for i in range(10**2):
        keys = np.random.randint(10**9, size=10**5, dtype=key_type)
        gp_dict[keys] = values
Example #9
0
def test_getpy_very_big_dict_u8_S16():
    key_type = np.dtype('u8')
    value_type = np.dtype('S16')

    gp_dict = gp.Dict(key_type, value_type)

    values = np.array([np.random.bytes(16) for i in range(10**5)], dtype=value_type)

    for i in range(10**2):
        keys = np.random.randint(10**15, size=10**5, dtype=key_type)
        gp_dict[keys] = values
Example #10
0
 def load_gp(self, filename):
     """Override gp.Dict.load, to correctly merge values instead of overwriting."""
     other = gp.Dict(HASH_TYPE, np.uint8, default_value=False)
     other.load(str(filename))
     n = len(other)
     keys = np.fromiter((k for (k, v) in other.items()),
                        dtype=HASH_TYPE,
                        count=n)
     values = np.fromiter((v for (k, v) in other.items()),
                          dtype=np.uint8,
                          count=n)
     self.merge(keys, values)
 def __init__(self, data_format):
     super().__init__(data_format)
     self.collision_radius = 0.0 
     key_type = np.dtype('i8')
     value_type = np.dtype('i8')
     self.collisions_per_agent = gp.Dict(key_type, value_type, default_value=0)
     self.agent_radius = 0.2 #0.2 normal  0.1 for UNIV crowded
     self.total_collisions = 0
     self.agents = dict()
     self.num_agents_buffer = []
     self.time_step_done = False
     self.time_step = 0
Example #12
0
def test_getpy_big_dict_uint64_lookup():
    key_type = np.dtype('u8')
    value_type = np.dtype('u8')

    gp_dict = gp.Dict(key_type, value_type)

    keys = np.random.randint(10**15, size=10**5, dtype=key_type)
    values = np.random.randint(10**15, size=10**5, dtype=value_type)

    gp_dict[keys] = values

    for i in range(10**2):
        values = gp_dict[keys]
Example #13
0
def test_getpy_methods_with_multidim_and_strings():
    key_type = np.dtype('S8')
    value_type = np.dtype('S8')

    keys = np.array([np.random.bytes(4) for i in range(10**2)], dtype=key_type).reshape(10,10)
    values = np.array([np.random.bytes(4) for i in range(10**2)], dtype=value_type).reshape(10,10)

    gp_dict = gp.Dict(key_type, value_type)
    gp_dict[keys] = values

    p_dict = {key : value for key, value in zip(keys.flat, values.flat)}

    assert len(gp_dict) == len(np.unique(keys.flat))
    assert all([gp_dict[key] == p_dict[key] for key in keys.flat])
Example #14
0
def test_getpy_methods_with_strings():
    key_type = np.dtype('S8')
    value_type = np.dtype('S8')

    keys = np.array([np.random.bytes(8) for i in range(10**2)], dtype=key_type)
    values = np.array([np.random.bytes(8) for i in range(10**2)], dtype=value_type)

    gp_dict = gp.Dict(key_type, value_type)
    gp_dict[keys] = values

    p_dict = {key : value for key, value in zip(keys, values)}

    assert len(gp_dict) == len(np.unique(keys))
    assert all([gp_dict[key] == p_dict[key] for key in keys])
Example #15
0
def test_getpy_methods_with_multidim():
    key_type = np.dtype('u8')
    value_type = np.dtype('u8')

    keys = np.random.randint(1, 1000, size=10**2, dtype=key_type).reshape(10,10)
    values = np.random.randint(1, 1000, size=10**2, dtype=value_type).reshape(10,10)

    gp_dict = gp.Dict(key_type, value_type)
    gp_dict[keys] = values

    p_dict = {key : value for key, value in zip(keys.flat, values.flat)}

    assert len(gp_dict) == len(np.unique(keys))
    assert all([gp_dict[key] == p_dict[key] for key in keys.flat])
Example #16
0
def test_getpy_methods():
    key_type = np.dtype('u8')
    value_type = np.dtype('u8')

    keys = np.random.randint(1, 1000, size=10**2, dtype=key_type)
    values = np.random.randint(1, 1000, size=10**2, dtype=value_type)

    gp_dict = gp.Dict(key_type, value_type)
    gp_dict[keys] = values

    p_dict = {key: value for key, value in zip(keys, values)}

    assert len(gp_dict) == len(np.unique(keys))
    assert all([gp_dict[key] == p_dict[key] for key in keys])
Example #17
0
def build_gp_dict(data_set, key_type, value_type):
    """
    Convert dataset with int64 keys and int32/int32 vals into int64/int64 gpdict
    """
    gp_dict = gp.Dict(key_type, value_type)

    gp_keys = np.array(data_set[:, 0]).astype(np.int64)
    gp_vals = np.array(data_set[:, 1:]).astype(np.int64)

    gp_vals = gp_vals.astype(np.int32).reshape(-1)
    gp_vals = gp_vals.view(np.int64)

    gp_dict[gp_keys] = gp_vals
    return gp_dict
Example #18
0
def test_getpy_very_big_dict_uint64_bytearray32():
    key_type = np.dtype('u8')
    value_type = gp.types['bytearray32']

    gp_dict = gp.Dict(key_type, value_type)

    values = np.packbits([
        np.array([1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1] * 16,
                 dtype=np.bool)
    ] * 10**5,
                         axis=1).view(value_type)

    for i in range(10**2):
        keys = np.random.randint(10**15, size=10**5, dtype=key_type)
        gp_dict[keys] = values
Example #19
0
def test_getpy_methods_with_default():
    key_type = np.dtype('u8')
    value_type = np.dtype('u8')

    keys = np.random.randint(1, 1000, size=10**2, dtype=key_type)
    values = np.random.randint(1, 1000, size=10**2, dtype=value_type)

    default_value = 4242
    gp_dict = gp.Dict(key_type, value_type, default_value=default_value)
    gp_dict[keys] = values

    random_keys = np.random.randint(1, 1000, size=500, dtype=key_type)
    random_values = gp_dict[random_keys]

    assert np.all(random_values[np.where(gp_dict.contains(random_keys))] != default_value)
    assert np.all(random_values[np.where(np.logical_not(gp_dict.contains(random_keys)))] == default_value)
Example #20
0
def test_getpy_methods_with_default_and_strings():
    key_type = np.dtype('S8')
    value_type = np.dtype('S8')

    keys = np.array([np.random.bytes(8) for i in range(10**2)], dtype=key_type)
    values = np.array([np.random.bytes(8) for i in range(10**2)], dtype=value_type)

    default_value = np.random.bytes(8)
    gp_dict = gp.Dict(key_type, value_type, default_value=default_value)
    gp_dict[keys] = values

    random_keys = np.array([np.random.bytes(8) for i in range(10**3)], dtype=key_type)
    random_values = gp_dict[random_keys]

    assert np.all(random_values[np.where(gp_dict.contains(random_keys))] != default_value)
    assert np.all(random_values[np.where(np.logical_not(gp_dict.contains(random_keys)))] == default_value)
Example #21
0
def preload(gp_dict_file, loop_archive, loop_struct_silent,
            rosetta_flags_file):
    """
    Util to preload all the stuff and initialize from user inputs
    """
    key_type = np.int64
    value_type = np.int64
    gp_dict = gp.Dict(key_type, value_type)
    gp_dict.load(gp_dict_file)

    with open(loop_archive, "r") as f:
        loop_list = f.read().splitlines()
    run_pyrosetta_with_flags(rosetta_flags_file)

    sfd, silent_index, silent_out = silent_preload(loop_struct_silent)

    return gp_dict, loop_list, sfd, silent_index, silent_out
Example #22
0
def test_getpy_types():
    for key_type, value_type in gp.dict_types:
        gp_dict = gp.Dict(key_type, value_type)

        if key_type.kind == 'U':
            keys = np.array(['0123456789' * 10 for i in range(10)],
                            dtype=key_type)
        else:
            keys = np.array(range(10), dtype=key_type)

        if value_type.kind == 'U':
            values = np.array(['0123456789' * 10 for i in range(10)],
                              dtype=value_type)
        else:
            values = np.array(range(10), dtype=value_type)

        gp_dict[keys] = values

    values = gp_dict[keys]
Example #23
0
def retrieve_gp_dict_from_cache(ori, cart, key_type, value_type):
    """
    Checks cache for gp_dict, returns None if not found
    """
    dest_path_gp_cache_dir = os.path.join(os.path.dirname(__file__),
                                          "cache/gp_dicts/")
    os.makedirs(dest_path_gp_cache_dir, exist_ok=True)

    key = f"{(ori, cart)}"
    try:
        with open(dest_path_gp_cache_dir + "/hashmaps.json", "r") as f:
            index_dict = json.load(f)
        name = index_dict.get(key)
        if name is None:
            return
    except FileNotFoundError:
        return
    except json.decoder.JSONDecodeError:
        return

    gp_dict = gp.Dict(key_type, value_type)
    gp_dict.load(dest_path_gp_cache_dir + "/" + name)
    return gp_dict
Example #24
0
def test_getpy_vectorized_methods():
    key_type = np.dtype('u8')
    value_type = np.dtype('u8')

    gp_dict = gp.Dict(key_type, value_type)

    keys = np.random.randint(1, 1000, size=200, dtype=key_type)
    values = np.random.randint(1, 1000, size=200, dtype=value_type)

    gp_dict[keys] = values

    iterated_keys = [key for key in gp_dict]
    iterated_keys_and_values = [(key, value) for key, value in gp_dict.items()]

    assert len(gp_dict) == len(np.unique(keys))

    p_dict = dict()
    for key, value in zip(keys, values):
        p_dict[key] = value

    assert len(gp_dict) == len(p_dict)
    assert sorted([(key, value) for key, value in gp_dict.items()
                   ]) == sorted(p_dict.items())

    select_keys = np.random.choice(keys, size=100).astype(key_type)
    select_values = gp_dict[select_keys]

    random_keys = np.random.randint(1, 1000, size=500).astype(key_type)
    random_keys_mask = gp_dict.contains(random_keys)

    mask_keys = random_keys[random_keys_mask]
    mask_values = gp_dict[mask_keys]

    gp_dict.iadd(keys, values)
    gp_dict.isub(keys, values)
    gp_dict.ior(keys, values)
    gp_dict.iand(keys, values)
Example #25
0
def main(dict_list, frag_list, silent_list):
    ""
    keys_unique = {}
    with open(dict_list, "r") as f:
        dict_paths = f.read().splitlines()
    with open(frag_list, "r") as f:
        frag_paths = f.read().splitlines()
    data_paths_iter = zip(dict_paths, frag_paths)

    for dict_path, frag_path in data_paths_iter:
        key_type = np.dtype("i8")
        value_type = np.dtype("i8")
        dict_temp = gp.Dict(key_type, value_type)
        dict_temp.load(dict_path)
        with open(frag_path, "r") as f:
            frags = f.read().splitlines()

        keys = dict_temp.keys()
        vals = dict_temp[keys].view(np.int32).reshape(-1, 2)

        keys_loops_iter = (
            (key, frags[val[0] : val[0] + val[1]])
            for key, val in zip(keys, vals)
        )

        update_unique_key_dict(keys_unique, keys_loops_iter)

    logging.debug(f"starting hashmap population")
    offset = 0
    strings_master = []
    gp_vals_list = []
    gp_keys_list = []
    for key, strings in keys_unique.items():
        logging.debug(f"key string pair: {key}, {strings}")
        num_strings = len(strings)
        logging.debug(f"num_strings: {num_strings}")
        strings_master.extend(strings)
        gp_keys_list.append(key)
        gp_vals_list.append([offset, num_strings])
        offset += num_strings
        logging.debug(f"new offset: {offset}")
    gp_keys = np.array(gp_keys_list)
    gp_vals = np.array(gp_vals_list)

    logging.debug(f"gp_vals: {gp_vals}")

    gp_vals_i32_flat = gp_vals.astype(np.int32).reshape(-1)

    logging.debug(f"gp_vals_i32_flat: {gp_vals_i32_flat}")

    gp_vals_i64 = gp_vals_i32_flat.view(np.int64)

    key_type = np.int64
    value_type = np.int64
    gp_dict = gp.Dict(key_type, value_type)
    gp_dict[gp_keys] = gp_vals_i64

    gp_dump = "getpy_dict.bin"
    gp_dict.dump(gp_dump)

    key_val_data = np.empty((gp_keys.shape[0], 3))
    key_val_data[:, 0] = gp_keys
    key_val_data[:, 1:] = gp_vals

    string_master_file = "loop_tag_index.txt"

    with open(string_master_file, mode="wt", encoding="utf-8") as f:
        f.write("\n".join(strings_master))
        f.write("\n")

    npz_out = "key_val_data.npz"
    np.savez(npz_out, key_val_data)

    combine_silents(silent_list, "loop_archive.silent")
Example #26
0
def main(
    silent_file,
    rosetta_flags_file="",
    xbin_cart_list=[],
    xbin_ori_list=[],
    max_len=20,
    scan_file="",
):

    """
    Generates a loop e2e xbin table referencing a silentfile with metadata

    e2e hashtable holds just xbin keys and table entry keys. The table entry
    keys are just arbitrary indices that go to a table referncing a silentfile

    The table is just a list of tags and positions in a text list. The values
    deposited in the getpy dict are a two number coordinate: start index and num entries.
    To find all the appropriate values, just load the txt list into a python list
    and slice it: list[start:start + num_entries].


    """
    run_pyrosetta_with_flags(rosetta_flags_file)

    key_type = np.dtype("i8")
    value_type = np.dtype("i8")
    gp_dict = gp.Dict(key_type, value_type)

    xforms = []
    loop_data_string_list = []

    # silent_name = "loop_structs_out.silent"
    sfd = SilentFileData(
        silent_file, False, False, "binary", SilentFileOptions()
    )

    # for pose in poses_from_silent(silent_file):
    sfd.read_file(silent_file)
    for tag in sfd.tags():
        logging.debug(f"working on tag: {tag}")
        try:
            poses = silent_tag_to_poselets(silent_file, tag, 1, 2)
        except AssertionError:
            logging.debug("assertion in hackload failed, skipping")
            continue
        except RuntimeError:
            logging.debug("Unable to load this fragment in hackload, skipping")
            continue
        tag_loop_data_list, tag_xforms_list = parse_xforms_from_poselets(
            poses, tag, max_n_mer=max_len
        )
        # tag_loop_data_list, tag_xforms_list = parse_xforms_from_tag(
        #     sfd, tag, max_n_mer=max_len
        # )
        loop_data_string_list.extend(tag_loop_data_list)
        xforms.extend(tag_xforms_list)

        logging.debug("loop data loaded")
    xbin_cart_list, xbin_ori_list = setup_xbin_vars(
        xbin_cart_list, xbin_ori_list, scan_file
    )
    logging.debug(f"fragments extracted, building tables")
    logging.debug(
        f"""xbin params:
    c:{xbin_cart_list}
    o:{xbin_ori_list}"""
    )
    for xbin_cart, xbin_ori in product(xbin_cart_list, xbin_ori_list):

        binner = xb(cart_resl=xbin_cart, ori_resl=xbin_ori)
        all_keys_non_unique = binner.get_bin_index(np.array(xforms))

        keys_unique = {}
        for key, loop_data_string in zip(
            all_keys_non_unique, loop_data_string_list
        ):
            if key in keys_unique.keys():
                keys_unique[key].append(loop_data_string)
            else:
                keys_unique[key] = [loop_data_string]

        offset = 0
        strings_master = []
        gp_vals_list = []
        gp_keys_list = []
        for key, strings in keys_unique.items():
            num_strings = len(strings)
            strings_master.extend(strings)
            gp_keys_list.append(key)
            gp_vals_list.append([offset, num_strings])
            offset += num_strings
        gp_keys = np.array(gp_keys_list, dtype=np.int64)
        gp_vals = np.array(gp_vals_list, dtype=np.int64)
        general_vals = gp_vals
        # squash data to fit into getpy_dict
        gp_vals = gp_vals.astype(np.int32).reshape(-1)
        gp_vals = gp_vals.view(np.int64)

        gp_dict[gp_keys] = gp_vals

        gp_dump = f"gp_c{xbin_cart}_o{xbin_ori}.bin"
        gp_dict.dump(gp_dump)

        key_val_data = np.empty((gp_keys.shape[0], 3), dtype=np.int64)
        key_val_data[:, 0] = gp_keys
        key_val_data[:, 1:] = general_vals

        # string_master_file = f"loop_tag_index_c{xbin_cart}_o{xbin_ori}.txt"
        np_strings_master = np.array(strings_master, dtype=np.string_)
        #
        # with open(string_master_file, mode="wt", encoding="utf-8") as f:
        #     f.write("\n".join(strings_master))
        #     f.write("\n")

        # npz_out = "key_val_data.npz"
        # np.savez(npz_out, key_val_data)

        hdf5 = h5py.File("fragment_data.hf5", "a")
        kv_group = hdf5.require_group("key_value_data")
        key_val_ds = kv_group.require_dataset(
            f"key_val_index_cart_{xbin_cart}_ori_{xbin_ori}_nmer_{max_len}",
            key_val_data.shape,
            dtype=key_val_data.dtype,
        )
        key_val_ds[:] = key_val_data
        key_val_ds.attrs.create("cart_resl", data=xbin_cart)
        key_val_ds.attrs.create("ori_resl", data=xbin_ori)
        key_val_ds.attrs.create("max_len", data=max_len)
        key_val_ds.attrs.create(
            "description",
            data="this is a 2XN np array where col1 are the int64 xbin keys and col2/3 are two upcast int32s (row,n_strings) addressing a sequence of strings in a corresponding archive",
        )
        str_group = hdf5.require_group("string_archive")
        string_archive = str_group.require_dataset(
            f"string_archive_cart_{xbin_cart}_ori_{xbin_ori}_nmer_{max_len}",
            np_strings_master.shape,
            dtype=np_strings_master.dtype,
        )
        string_archive[:] = np_strings_master
        string_archive.attrs.create("cart_resl", data=xbin_cart)
        string_archive.attrs.create("ori_resl", data=xbin_ori)
        string_archive.attrs.create("max_len", data=max_len)
        string_archive.attrs.create(
            "description",
            data="Ordered archive of strings describing pose fragments by tag:start:end. The order is addressed by the corresponding key_val dataset",
        )
        # str_id = string_archive.id
        # key_val_ds.attrs.create("string_archive_id", data=str_id)
        hdf5.close()