Ejemplo n.º 1
0
def stream_data_store(args):
    for n, G in tqdm(enumerate(
        gen_random_graphs(args.graphs, *args.nodes,
                          read_in=args.readin)
    )):
        A = nx.to_numpy_array(G)
        N = G.number_of_nodes()
        # Mcore = sample_censored(G, n_walks=0,
        #                     n_obsv=args.length, steps=100, demo=False)
        for nwalks in args.nwalks:
            M = sample_censored(G, n_walks=nwalks,
                                n_obsv=args.length,
                                steps=100, demo=False, cuda=False)
            model = CensoredRW(N, sym=True)

            learning_rate = 0.1

            train(model, np.array(M), batch_size=None, compare_true=A,
                  epochs=50, callback=False, lr=learning_rate)

            res = model.P.detach().cpu().numpy()

            def prep(A): return minmax(symmetrize(A))

            mkv1 = markov_model(M, k=1)
            mkv2 = markov_model(M, k=2)
            cos = cosine_model(M)

            models = dict(
                INVITE=prep(res),
                Cosine=prep(cos),
                MC1=prep(mkv1),
                MC2=prep(mkv2),
            )

            logs = test_multi(A, **models)

            for name, log in logs.items():
                for pos, t in enumerate(log['t']):
                    d = dict(
                        graph=f'G{n}',
                        model=name,
                        aps=log['aps'],
                        f_opt=log['f'][log['opt_pos']],
                        t_opt=log['opt_pos'],
                        t=t, r=log['r'][pos], p=log['p'][pos],
                        f=log['f'][pos],
                        nwalks=nwalks,
                        nodes=N,
                    )
                    yield d
Ejemplo n.º 2
0
def main(seqpath, epochs, threshold, cuda_id, use_cuda, save_dir, fwd, rev,
         min_count, multi, reg_epochs):
    logger = logging.getLogger(__name__)

    logger.info(f"saving to {save_dir}")
    save_dir = Path(save_dir)
    save_dir.mkdir(exist_ok=True, parents=True)

    experiment = SingleRound(path=seqpath,
                             forward_adapter=fwd,
                             reverse_adapter=rev)

    # training
    train_loader, test_loader = experiment.get_dataloader(min_count=min_count,
                                                          use_cuda=use_cuda)
    device = torch.device(f"cuda:{cuda_id}" if (
        use_cuda and torch.cuda.is_available()) else "cpu")

    train_kwargs = {
        "epochs": epochs,
        "threshold": threshold,
        "device": device,
        "train_loader": train_loader,
        "test_loader": test_loader,
        "save_dir": save_dir,
        "beta_schedule": True,
        "force_matching": True,
        "force_epochs": reg_epochs,
    }

    # evaluate model
    target_len = experiment.random_region_length
    for i in range(multi):
        model = CNN_PHMM_VAE(motif_len=target_len, embed_size=2)
        model_str = str(type(model)).split("\'")[-2].split(".")[-1].lower()
        if multi > 1:
            model_str += f"_{i}"
        model_str += ".mdl"
        logger.info(f"training {model_str}")
        optimizer = optim.Adam(model.parameters())
        model = model.to(device)

        train_kwargs.update({
            "model": model,
            "model_str": model_str,
            "optimizer": optimizer
        })
        models.train(**train_kwargs)

        torch.cuda.empty_cache()
Ejemplo n.º 3
0
    def run(self):
        X, y = {}, {}
        filepath = f'{self.data_path}/train/train1.npz'
        X, y, self.fs, self.ch_names = load_session(filepath,
                                                    start=self.start,
                                                    end=self.end)
        logging.info(f'Shape: X_full {X.shape} - y {y.shape}')

        # Pre-processing - filtering
        if self.model_name == 'CSP':
            X = filtering(X,
                          self.fs,
                          f_order=5,
                          f_low=7,
                          f_high=35,
                          f_type='cheby')

        # Cropping
        if self.n_crops > 1:
            X, y = cropping(X,
                            y,
                            self.fs,
                            n_crops=self.n_crops,
                            crop_len=self.crop_len)

        # Preprocessing
        X = preprocessing(X,
                          self.fs,
                          rereference=self.should_reref,
                          filt=self.should_filter,
                          standardize=self.should_standardize)

        # For logging purpose (t-SNE)
        try:
            trained_model, cv_mean, cv_std, train_time = train(
                self.model_name, X, y, self.train_mode, n_iters=self.n_iters)
        except Exception:
            logging.info(f'Training failed - {traceback.format_exc()}')

        logging.info(f'Trained successfully in {train_time:.0f}s \n'
                     f'Accuracy: {cv_mean:.2f}+-{cv_std:.2f} \n'
                     f'{trained_model}')
Ejemplo n.º 4
0
    else:
        plot_ivt(G, pos, M[:4], model, txtpos=.5, withlabels=lab)

    ps(f'inviteRW')

    N = struct.shape[0]
    cuda = False  # if N > 50 else True
    approx = CensoredRW(N, sym=True, cuda=cuda)

    learning_rate = 0.1

    train(approx,
          np.array(M),
          compare_true=A,
          batch_size=min(500, len(M)),
          epochs=50,
          callback=True,
          lr=learning_rate)

    res = fill_diagonal(approx.P.detach().cpu().numpy(), 0)

    def prep(A):
        return labeled_adj(struct.columns, minmax(symmetrize(A)))

    mkv1 = markov_model(M, k=1)
    mkv2 = markov_model(M, k=2)
    # mkv2 = hidden_markov(M, max(map(len, M)), N, n_jobs=4)
    cos = cosine_model(M)

    models = dict(
Ejemplo n.º 5
0
        sys = subsys[lab.get_text()]
        lab.set_color(color_dict[sys])
    plt.tight_layout()
    sns.despine()
    ps('tag_multinomial')

    # ################## Calculate INVITE Distances ################ #
    logging.info('training...')
    N = voc_nodes.shape[0]
    # uses pytorch + ADAM
    model = CensoredRW(N, cuda=False, sym=False)
    learning_rate = 0.1

    train(model,
          np.array(m),
          batch_size=min(500, len(m)),
          epochs=50,
          callback=True,
          lr=learning_rate)

    logging.info('Done!')

    # ########## COMPARISONS ############# #


    def prep(A):
        return labeled_adj(voc_nodes, tools.minmax(tools.symmetrize(A)))

    res = tools.fill_diagonal(model.P.detach().cpu().numpy(), 0)

    mkv1 = tools.markov_model(m, k=1)  # order 1, via `pomegranate`
    mkv2 = tools.markov_model(m, k=2)  # order 2, via `pomegranate`
Ejemplo n.º 6
0
def main(n_seq, seed, epochs, threshold, cuda_id, use_cuda, save_dir,
         reg_epochs, multi, only_cnn):
    logger = logging.getLogger(__name__)

    logger.info(f"saving to {save_dir}")
    save_dir = Path(save_dir)
    save_dir.mkdir(exist_ok=True, parents=True)

    # generate sequences
    fwd_adapter = "AAAAA"
    rev_adapter = "GGGGG"

    generator = SequenceGenerator(num_motifs=1,
                                  seed=seed,
                                  fix_random_region_length=True,
                                  error_rate=0,
                                  generate_motifs=True,
                                  add_primer=True,
                                  forward_primer=fwd_adapter,
                                  reverse_primer=rev_adapter,
                                  middle_insert_range=[2, 6],
                                  one_side_proba=0.5,
                                  paired=True)

    reads, motif_indices, paired_indices = generator.sample(n_seq)
    with open(save_dir / "seqences.txt", "w") as f:
        for index, read in zip(motif_indices, reads):
            f.write(f"{index}, {read}\n")
    with open(save_dir / "motifs.txt", "w") as f:
        for motif in generator.motifs:
            f.write(f"{motif}\n")

    experiment = SingleRound(reads,
                             forward_adapter=fwd_adapter,
                             reverse_adapter=rev_adapter)

    # training
    train_loader, test_loader = experiment.get_dataloader(use_cuda=use_cuda)
    device = torch.device(f"cuda:{cuda_id}" if (
        use_cuda and torch.cuda.is_available()) else "cpu")

    train_kwargs = {
        "epochs": epochs,
        "threshold": threshold,
        "device": device,
        "train_loader": train_loader,
        "test_loader": test_loader,
        "save_dir": save_dir,
        "beta_schedule": True,
        "force_matching": True,
        "force_epochs": reg_epochs,
    }

    # evaluate models
    target_len = experiment.random_region_length

    results = dict()
    for i in range(multi):
        eval_models = [
            CNN_Mul_VAE(target_len=target_len, embed_size=2),
            CNN_AR_VAE(embed_size=2),
            CNN_PHMM_VAE(motif_len=target_len, embed_size=2)
        ]
        if not only_cnn:
            eval_models.extend([
                LSTM_Mul_VAE(target_len=target_len, embed_size=2),
                LSTM_AR_VAE(embed_size=2),
                LSTM_PHMM_VAE(motif_len=target_len, embed_size=2),
                CNNLSTM_Mul_VAE(target_len=target_len, embed_size=2),
                CNNLSTM_AR_VAE(embed_size=2),
                CNNLSTM_PHMM_VAE(motif_len=target_len, embed_size=2)
            ])
        for model in eval_models:
            model_str = str(type(model)).split("\'")[-2].split(".")[-1].lower()
            if multi > 1:
                model_str += f"_{i}"
            model_str += ".mdl"
            print(f"training {model_str}")
            optimizer = optim.Adam(model.parameters())
            model = model.to(device)

            train_kwargs.update({
                "model": model,
                "model_str": model_str,
                "optimizer": optimizer
            })
            results[model_str] = models.train(**train_kwargs)

            torch.cuda.empty_cache()