def test_Zipdist2(): class X(): def __init__(self, name): self.name = name x = X(name='examplezip2') x.year = 2020 x.exnp = np.zeros(3) x.expd = pd.DataFrame({"A": [1, 2, 3], "B": [2, 4, 6]}) z = Zipdist2(name="examplezip2", target=x) assert z.target is x z._save(dest="xxx", dest_tar="xxx.tar.gz") assert os.path.isfile("xxx.tar.gz") x2 = X(name='examplezip2') assert 'year' not in x2.__dict__.keys() z._build(target=x2, dest="xxx", dest_tar="xxx.tar.gz") print(x.__dict__.keys()) assert isinstance(x2.exnp, np.ndarray) #assert np.all(x2.exnp == x.exnp) print(x2.exnp) print(x.exnp) assert np.array_equal(x2.exnp, x.exnp) assert np.all(x2.expd.equals(x.expd)) assert np.all(x2.name == x.name) assert np.all(x2.year == x.year)
def test_Zipdist2_example(): from zipdist.zip2 import Zipdist2 import pandas as pd import numpy as np import os class X(): def __init__(self, name): self.name = name x = X(name='example_target_object') x.example_simple_attr = [1989, 2020] x.example_np_attr = np.array([[0, 1, 2, 3], [4, 5, 6, 7]]) x.example_pd_attr = pd.DataFrame({"A": [1, 2, 3], "B": [2, 4, 6]}) z = Zipdist2(name="zipper", target=x) z._save(dest="archive", dest_tar="archive.tar.gz") assert os.path.isfile("archive.tar.gz") x_new = X(name='example_target_object') z = Zipdist2(name="zipper", target=x_new) # You can use the z._build() and reload all object attributes z._build(target=x_new, dest="archive", dest_tar="archive.tar.gz") print(f"For instance:") print(f"\tx_new.example_simple_attr:\n{x_new.example_simple_attr}") print(f"\tx_new.example_np_attr:\n{x_new.example_np_attr}") print(f"\tx_new.example_pd_attr:\n{x_new.example_pd_attr}") # You can alternatively use the z._ready() and reload object attributes one by one x_new = X(name='example_target_object') z = Zipdist2(name="zipper", target=x_new) z._ready(target=x_new, dest="archive", dest_tar="archive.tar.gz") z._reload_complex(k='example_np_attr') z._reload_simple(k='example_simple_attr')
def test_TCRrep_func_tcrdist2_save_manual_rebuild(chains = ['beta','alpha'], metric = "nw"): cpu = multiprocessing.cpu_count() # really basic example df = pd.read_csv(opj('tcrdist', 'datasets', 'dash.csv')) df = df[df.epitope.isin(['NP'])] tr = TCRrep(cell_df=df, chains=chains, organism='mouse') tr.tcrdist2(processes = cpu, metric = metric, dump = True, reduce = True, save=True, dest = "default_archive", dest_tar_name = "default_archive.tar.gz" ) # Cleanup folder that you just made os.system("rm -rf myTCRrep_archive") # Rebuild tr = TCRrep(cell_df=df.iloc[0:0,:], chains=chains, organism='mouse') z = Zipdist2(name = "test_only", target = tr) z._build(dest_tar = "default_archive.tar.gz", target = tr) assert isinstance(tr.paired_tcrdist, np.ndarray ) assert isinstance(tr.pw_tcrdist, np.ndarray ) assert np.array_equal(tr.pw_tcrdist, tr.paired_tcrdist)
def archive(self, dest=None, dest_tar_name=None, verbose=True, use_csv=False): """ Use Zipdist2 to Make an Archive.tar.gz Parameters ---------- dest : str e.g., 'default_archive' dest_tar_name : str e.g., 'default_archive.tar.gz' verbose : bool if True, report steps in archive process use_csv : bool if True, archive will include .csv file. Useful for porting files to other applications, but creates large files. Example ------- .. code-block:: python tr = TCRrep(cell_df = pd.DataFrame(), organism = "mouse") tr.archive(dest = "default_archive", dest_tar_name = "default_archive.tar.gz") Notes ----- See :py:meth:`tcrdist.repertoire.rebuild`: for reubilding a TCRrep instance from an TCRrep archive .tar.gz file. """ if dest is None: dest = self.archive_name if dest_tar_name is None: dest_tar_name = f"{dest}.tar.gz" self.cell_df_index = self.cell_df.index.copy() self.cell_df = self.cell_df.reset_index() z = Zipdist2(name=dest_tar_name, target=self) z._save(dest=dest, dest_tar=dest_tar_name, verbose=verbose, use_csv=use_csv) sys.stdout.write( f"\tArchiving your TCRrep using Zipdist2 in [{dest_tar_name}]\n")
def make_tar_gz(): class X(): def __init__(self, name): self.name = name x = X(name='xxx') x.year = 2020 x.exnp = np.zeros(3) x.exnp1 = np.zeros(10) #, "np.zeros(10)" x.exnp2 = np.ones(10) #, "np.ones(10)"), x.exnp3 = np.array([[0, 1, 2, 3], [4, 5, 6, 7]]) #, "2D np.array"), x.exnp4 = np.random.randint(0, 100, size=(30, 10, 2)) #, "3D np.array") ] x.expd = pd.DataFrame({"A": [1, 2, 3], "B": [2, 4, 6]}) z = Zipdist2(name="xxx", target=x) z._save(dest="xxx", dest_tar="xxx.tar.gz") os.system('rm -rf xxx') return x
def rebuild(self, dest=None, dest_tar_name=None, verbose=True): """ Use Zipdist2 to reubild a TCRrep instance from an Archive.tar.gz Parameters ---------- dest_tar_name : str e.g., 'default_archive.tar.gz' verbose : bool If True, report rebuilding process steps. Example ------- Shows :py:meth:`tcrdist.repertoire.archive` and :py:meth:`tcrdist.repertoire.rebuild` used together. .. code-block:: python tr = TCRrep(cell_df = pd.DataFrame(), organism = "mouse") tr.archive(dest = "default_archive", dest_tar_name = "default_archive.tar.gz") tr_new = TCRrep(cell_df = pd.DataFrame(), organism = "mouse") tr_new.rebuild(dest_tar_name = "default_archive.tar.gz") Notes ----- See :py:meth:`tcrdist.repertoire.archive` for creating TCRrep archive file. """ #tr = TCRrep(cell_df=df.iloc[0:0,:], chains=chains, organism='mouse') if dest is None: dest = self.archive_name if dest_tar_name is None: dest_tar_name = f"{dest}.tar.gz" z = Zipdist2(name=dest, target=self) z._build(dest_tar=dest_tar_name, target=self, verbose=verbose) # VALIDATION OF INPUTS # check that chains are valid. self._validate_organism() self._validate_chains() # check that is a pd.DataFrame self._validate_cell_df() # RE INIT the REFERENCE DB see repertoire_db.py self.all_genes = repertoire_db.RefGeneSet(self.db_file).all_genes