Exemple #1
0
 def describe(self) -> str:
     text = f"SingleCellOMICs: {self.name}"
     pad = "\n     "
     for omic in self.omics:
         X = self.numpy(omic)
         all_nonzeros = []
         for s, e in batching(n=self.n_obs, batch_size=BATCH_SIZE):
             x = X[s:e]
             ids = np.nonzero(x)
             all_nonzeros.append(x[ids[0], ids[1]])
         all_nonzeros = np.concatenate(all_nonzeros)
         text += pad[:-1] + "OMIC: '%s' - dtype: '%s'" % (
             omic.name, "binary" if self.is_binary(omic) else "continuous")
         text += pad + 'Sparsity  : %.2f' % self.sparsity(omic)
         text += pad + 'Nonzeros  : %s' % describe(
             all_nonzeros, shorten=True, float_precision=2)
         text += pad + 'Cell      : %s' % describe(
             self.counts_per_cell(omic), shorten=True, float_precision=2)
         text += pad + 'Gene      : %s' % describe(
             self.counts_per_gene(omic), shorten=True, float_precision=2)
         text += pad + 'LogCount  : %s' % describe(
             self.log_counts(omic), shorten=True, float_precision=2)
         text += pad + 'LocalMean : %s' % describe(
             self.local_mean(omic), shorten=True, float_precision=2)
         text += pad + 'LocalVar  : %s' % describe(
             self.local_var(omic), shorten=True, float_precision=2)
     return text
    X, y = X[perm], y[perm]
    X_train, y_train = X[:int(0.8 * n)], y[:int(0.8 * n)]
    X_test, y_test = X[int(0.8 * n):], y[int(0.8 * n):]
elif 'X_train' in ds and 'X_test' in ds:
    X_train, y_train = ds['X_train'], ds['y_train']
    X_test, y_test = ds['X_test'], ds['y_test']
else:
    raise RuntimeError('No support for dataset: "%s"' % args.ds)
# ====== post processing ====== #
if y_train.ndim > 1:
    y_train = np.argmax(y_train, axis=-1)
if y_test.ndim > 1:
    y_test = np.argmax(y_test, axis=-1)
input_shape = (None, ) + X_train.shape[1:]
n_classes = len(np.unique(y_train))
print("Train:", ctext(X_train.shape, 'cyan'), describe(X_train, shorten=True))
print("Test :", ctext(X_test.shape, 'cyan'), describe(X_test, shorten=True))
# ====== create basic tensor ====== #
X = K.placeholder(shape=(None, ) + input_shape[1:], name='X')
W = K.placeholder(shape=(None, ) + input_shape[1:], name='W')
y = K.placeholder(shape=(None, ), name='y')
nsample = K.placeholder(shape=(), dtype='int32', name='nsample')
# ===========================================================================
# Create the network
# ===========================================================================
index = [0]


def dense_creator():
    net = N.Sequence([
        N.Dense(int(args.hdim),
Exemple #3
0
                                        override=True,
                                        identifier='name',
                                        log_path=os.path.join(
                                            EXP_DIR, 'processor.log'),
                                        stop_on_failure=True)
        processor.run()
        # pp.validate_features(processor,
        #                      nb_samples=12,
        #                      path=os.path.join(EXP_DIR, 'feature_validation'),
        #                      override=True)
ds = F.Dataset(PATH_ACOUSTIC_FEATURES, read_only=True)
print(ds)
indices = list(ds['indices_%s' % args.feat].items())
print("Utterances length:")
print("   ",
      describe([end - start for name, (start, end) in indices], shorten=True))
# ===========================================================================
# Basic path for GMM, T-matrix and I-vector
# ===========================================================================
EXP_DIR = os.path.join(EXP_DIR, '%s_%d_%d' % (FEAT, NMIX, TV_DIM))
LOG_PATH = get_logpath(name='log.txt',
                       override=False,
                       root=EXP_DIR,
                       odin_base=False)
stdio(LOG_PATH)
print("Exp-dir:", ctext(EXP_DIR, 'cyan'))
print("Log path:", ctext(LOG_PATH, 'cyan'))
# ====== ivec path ====== #
GMM_PATH = os.path.join(EXP_DIR, 'gmm')
TMAT_PATH = os.path.join(EXP_DIR, 'tmat')
# zero order statistics
Exemple #4
0
print(protein.shape)
print(protein_name)

# ===========================================================================
# Probabilistic Embedding
# ===========================================================================
pb = ProbabilisticEmbedding(n_components_per_class=2,
                            positive_component=1,
                            log_norm=True,
                            clip_quartile=0.,
                            remove_zeros=True,
                            ci_threshold=-0.68,
                            random_state=5218,
                            verbose=True)
pb.fit(protein)

# binarize the protein matrix
y_bin = pb.predict(protein)
print(describe(y_bin))

# probabilize the protein matrix
y_prob = pb.predict_proba(protein)
print(describe(y_prob))

# ====== save the analysis and diagnosis ====== #
pb.boxplot(protein, protein_name).plot_diagnosis(
    protein,
    protein_name).plot_distribution(protein,
                                    protein_name).save_figures(FIGURE_PATH,
                                                               verbose=True)
Exemple #5
0
        dict(ds['path'].items()),
    ]
# ====== print log ====== #
print("Acoustic features:")
for dsname, (X, indices, y, path) in sorted(acoustic_features.items(),
                                            key=lambda x: x[0]):
    all_utt_length = dict([(name, end - start)
                           for name, (start, end) in indices.items()])
    print("  %s" % ctext(dsname, 'yellow'))
    print("   #Files         :", ctext(len(indices), 'cyan'))
    print("   #Noise         : %s/%s" %
          (ctext(len([i for i in indices if '/' in i]),
                 'lightcyan'), ctext(len(indices), 'cyan')))
    print("   Loaded features:", ctext(X.path, 'cyan'))
    print("   Utt length     :",
          describe(list(all_utt_length.values()), shorten=True))
    print("   Min length(+8) :")
    min_length = min(all_utt_length.values())
    for name, length in all_utt_length.items():
        if length <= min_length + 8:
            print('    %s | %s' % (name.split('/')[0], path[name]))
# ===========================================================================
# All system must extract following information
# ===========================================================================
# mapping from
# dataset_name -> 'name': 1-D array [n_samples],
#                 # (path to original audio)
#                 'path': 1-D array [n_samples],
#                 # Extracted latent vectors
#                 'X': 2-D array [n_samples, n_latent_dim]}
#                 # speaker label or meta-data (e.g. 'test', 'enroll', 'unlabeled')
Exemple #6
0
def main(y_prot,
         y_prot_names,
         n_components=2,
         index=1,
         log_norm=True,
         clip_quartile=0.,
         remove_zeros=True,
         ci_threshold=-0.68,
         outpath=None,
         figpath=None,
         verbose=False):
    if outpath is not None:
        bin_path = os.path.join(outpath, 'y_bin')
        prob_path = os.path.join(outpath, 'y_prob')
    if verbose:
        print("Start label thresholding:")
        print("  Output path:", ctext(outpath, 'yellow'))
        print("  Figure path:", ctext(figpath, 'yellow'))
    # ====== protein.count ====== #
    if verbose:
        print("  Protein labels:", ctext(', '.join(y_prot_names), 'cyan'))
        print("  Protein matrix:", ctext(y_prot.shape, 'cyan'))
    # ====== already binarized ====== #
    if len(np.unique(y_prot)) == 2:
        warnings.warn("y is already binarized!")
        exit()
    # ====== PB ====== #
    pb = ProbabilisticEmbedding(n_components_per_class=n_components,
                                positive_component=index,
                                log_norm=log_norm,
                                clip_quartile=clip_quartile,
                                remove_zeros=remove_zeros,
                                ci_threshold=ci_threshold,
                                verbose=verbose)
    pb.fit(y_prot)
    y_bin = pb.predict(y_prot)
    y_prob = pb.predict_proba(y_prot)
    if verbose:
        print("  Thresholded values:")
        print("   Original     :",
              ctext(describe(y_prot, shorten=True), 'lightcyan'))
        print("   Binarized    :",
              ctext(describe(y_bin, shorten=True), 'lightcyan'))
        print("   Probabilities:",
              ctext(describe(y_prob, shorten=True), 'lightcyan'))
    # ====== save the results ====== #
    if outpath is not None:
        with open(bin_path, 'wb') as f:
            pickle.dump(y_bin, f)
            if verbose:
                print("  Save binarized data to:", ctext(bin_path, 'yellow'))
        with open(prob_path, 'wb') as f:
            pickle.dump(y_prob, f)
            if verbose:
                print("  Save probabilized data to:",
                      ctext(prob_path, 'yellow'))
    # ====== save figure ====== #
    if figpath is not None:
        pb.boxplot(y_prot, y_prot_names).plot_diagnosis(
            y_prot, y_prot_names).plot_distribution(
                y_prot, y_prot_names).save_figures(path=figpath,
                                                   verbose=verbose)
Exemple #7
0
    nb = NegativeBinomialDisp(loc=mean, disp=disp_row)
    llk1 = tf.reduce_sum(nb.log_prob(x), axis=1).numpy()
    llk2 = log_nb_positive(x=torch.Tensor(x),
                           mu=torch.Tensor(mean),
                           theta=torch.Tensor(disp_row)).numpy()
    print(np.all(np.isclose(llk1, llk2)))
except:
    print("NOT POSSIBLE TO BROADCAST the first dimension")

# all disp available
nb = NegativeBinomialDisp(loc=mean, disp=disp)
llk1 = tf.reduce_sum(nb.log_prob(x), axis=1).numpy()
llk2 = log_nb_positive(x=torch.Tensor(x),
                       mu=torch.Tensor(mean),
                       theta=torch.Tensor(disp)).numpy()
print(np.all(np.isclose(llk1, llk2)))

s1 = nb.sample().numpy()
s2 = torch_nb(mean, disp).numpy()
print(describe(s1))
print(describe(s2))

zinb = ZeroInflated(nb, probs=pi)
llk1 = tf.reduce_sum(zinb.log_prob(x), axis=1).numpy()
llk2 = log_zinb_positive(x=torch.Tensor(x),
                         mu=torch.Tensor(mean),
                         theta=torch.Tensor(disp),
                         pi=torch.Tensor(pi)).numpy()
print(llk1)
print(llk2)
Exemple #8
0
        n_cache=120,
        ncpu=None,
        override=True,
        identifier='name',
        log_path=os.path.join(EXP_DIR, 'processor.log'),
        stop_on_failure=True)
    processor.run()
    # pp.validate_features(processor,
    #                      nb_samples=12,
    #                      path=os.path.join(EXP_DIR, 'feature_validation'),
    #                      override=True)
ds = F.Dataset(PATH_ACOUSTIC_FEATURES, read_only=True)
print(ds)
indices = list(ds['indices_%s' % args.feat].items())
print("Utterances length:")
print("   ", describe([end - start for name, (start, end) in indices], shorten=True))
# ===========================================================================
# Basic path for GMM, T-matrix and I-vector
# ===========================================================================
EXP_DIR = os.path.join(EXP_DIR, '%s_%d_%d' % (FEAT, NMIX, TV_DIM))
LOG_PATH = get_logpath(name='log.txt', override=False, root=EXP_DIR, odin_base=False)
stdio(LOG_PATH)
print("Exp-dir:", ctext(EXP_DIR, 'cyan'))
print("Log path:", ctext(LOG_PATH, 'cyan'))
# ====== ivec path ====== #
GMM_PATH = os.path.join(EXP_DIR, 'gmm')
TMAT_PATH = os.path.join(EXP_DIR, 'tmat')
# zero order statistics
Z_PATH = (
    os.path.join(EXP_DIR, 'Z_train'),
    os.path.join(EXP_DIR, 'Z_test'))