コード例 #1
0
ファイル: propagation_utils.py プロジェクト: he-org/CLEIT
def generate_precompute_kernel(network_file_path, output_file_path, alpha=0.7):
    network = dit.load_network_file(network_file_path)
    network_nodes = network.nodes()
    network_I = pd.DataFrame(np.identity(len(network_nodes)),
                             index=network_nodes,
                             columns=network_nodes)

    kernel = prop.network_propagation(network,
                                      network_I,
                                      alpha=alpha,
                                      symmetric_norm=False)
    kernel.columns = [str(col) for col in kernel.columns]
    kernel.index = [str(ind) for ind in kernel.index]
    kernel.sort_index(inplace=True)
    kernel.sort_index(axis=1, inplace=True)

    # assert all(kernel.index == kernel.columns)
    kernel = kernel.transpose()
    # assert all(kernel.sum()-1 < 1e-10)
    kernel.reset_index(inplace=True)
    try:
        kernel.to_feather(output_file_path)
    except ValueError:
        kernel.to_csv(output_file_path)
    return output_file_path
コード例 #2
0
def NBS_single(sm_mat,
               regNet_glap,
               propNet=None,
               propNet_kernel=None,
               k=3,
               verbose=False,
               **kwargs):
    # Check for correct input data
    if type(sm_mat) != pd.DataFrame:
        raise TypeError(
            'Somatic mutation data must be given as Pandas DataFrame')
    if propNet is not None:
        if type(propNet) != nx.Graph:
            raise TypeError('Networkx graph object required for propNet')
    if regNet_glap is not None:
        if type(regNet_glap) != pd.DataFrame:
            raise TypeError(
                'netNMF regularization network laplacian (regNet_glap) must be given as Pandas DataFrame'
            )

    # Load or set subsampling parameters
    pats_subsample_p, gene_subsample_p, min_muts = 0.8, 0.8, 10
    if 'pats_subsample_p' in kwargs:
        pats_subsample_p = float(kwargs['pats_subsample_p'])
    if 'gene_subsample_p' in kwargs:
        gene_subsample_p = float(kwargs['gene_subsample_p'])
    if 'min_muts' in kwargs:
        min_muts = int(kwargs['min_muts'])

    # Subsample Data
    sm_mat_subsample = core.subsample_sm_mat(sm_mat,
                                             propNet=propNet,
                                             pats_subsample_p=pats_subsample_p,
                                             gene_subsample_p=gene_subsample_p,
                                             min_muts=min_muts)
    if verbose:
        print 'Somatic mutation data sub-sampling complete'

    # Throw exception if subsampling returned empty dataframe
    if sm_mat_subsample.shape[0] == 0:
        raise ValueError(
            'Subsampled somatic mutation matrix contains no patients.')

    # Propagate data if network object is provided
    if propNet is not None:
        # Determine if propagation is can be based on pre-computed propagation kernel
        if propNet_kernel is None:
            # If kernel is not given and some propagation parameters are given in kwargs, set propagation parameters
            # Otherwise set default values
            alpha, symmetric_norm, save_prop = 0.7, False, False
            if 'prop_alpha' in kwargs:
                alpha = float(kwargs['prop_alpha'])
            if 'prop_symmetric_norm' in kwargs:
                symmetric_norm = ((kwargs['prop_symmetric_norm'] == 'True') |
                                  (kwargs['prop_symmetric_norm'] == True))
            if 'save_prop' in kwargs:
                save_prop = ((kwargs['save_prop'] == 'True') |
                             (kwargs['save_prop'] == True))
            # Save propagation step data if desired (indicated in kwargs)
            if save_prop:
                prop_sm_data = prop.network_propagation(
                    propNet,
                    sm_mat_subsample,
                    alpha=alpha,
                    symmetric_norm=symmetric_norm,
                    **kwargs)
            else:
                prop_sm_data = prop.network_propagation(
                    propNet,
                    sm_mat_subsample,
                    alpha=alpha,
                    symmetric_norm=symmetric_norm)
        else:
            # Save propagation step data if desired (indicated in kwargs)
            save_prop = False
            if 'save_prop' in kwargs:
                save_prop = ((kwargs['save_prop'] == 'True') |
                             (kwargs['save_prop'] == True))
            if save_prop:
                prop_sm_data = prop.network_kernel_propagation(
                    propNet, propNet_kernel, sm_mat_subsample, **kwargs)
            else:
                prop_sm_data = prop.network_kernel_propagation(
                    propNet, propNet_kernel, sm_mat_subsample)
        if verbose:
            print 'Somatic mutation data propagated'
    else:
        prop_sm_data = sm_mat_subsample
        if verbose:
            print 'Somatic mutation data not propagated'

    # Quantile Normalize Data
    qnorm_data = True
    if 'qnorm_data' in kwargs:
        qnorm_data = ((kwargs['qnorm_data'] == 'True') |
                      (kwargs['qnorm_data'] == True))
    if qnorm_data:
        prop_data_qnorm = core.qnorm(prop_sm_data)
        if verbose:
            print 'Somatic mutation data quantile normalized'
    else:
        prop_data_qnorm = prop_sm_data
        if verbose:
            print 'Somatic mutation data not quantile normalized'

    # Prepare data for mixed netNMF function (align propagated profile columns with regularization network laplacian rows)
    if propNet is not None:
        propNet_nodes = list(propNet.nodes)
        data_arr = np.array(prop_data_qnorm.T.ix[propNet_nodes])
        regNet_glap_arr = np.array(
            regNet_glap.ix[propNet_nodes][propNet_nodes])
    else:
        propNet_nodes = list(regNet_glap.index)
        data_arr = np.array(prop_data_qnorm.T.ix[propNet_nodes].fillna(0))
        regNet_glap_arr = np.array(regNet_glap)

    # Set netNMF parameters from kwargs if given, otherwise use defaults
    netNMF_lambda, netNMF_maxiter, netNMF_verbose = 200, 250, False
    netNMF_eps, netNMF_err_tol, netNMF_err_delta_tol = 1e-15, 1e-4, 1e-8
    if 'netNMF_lambda' in kwargs:
        netNMF_lambda = float(kwargs['netNMF_lambda'])
    if 'netNMF_maxiter' in kwargs:
        netNMF_maxiter = int(kwargs['netNMF_maxiter'])
    if 'netNMF_eps' in kwargs:
        netNMF_eps = float(kwargs['netNMF_eps'])
    if 'netNMF_err_tol' in kwargs:
        netNMF_err_tol = float(kwargs['netNMF_err_tol'])
    if 'netNMF_err_delta_tol' in kwargs:
        netNMF_err_delta_tol = float(kwargs['netNMF_err_delta_tol'])

    # Mixed netNMF Result
    W, H, numIter, finalResid = core.mixed_netNMF(
        data_arr,
        regNet_glap_arr,
        k=k,
        l=netNMF_lambda,
        maxiter=netNMF_maxiter,
        eps=netNMF_eps,
        err_tol=netNMF_err_tol,
        err_delta_tol=netNMF_err_delta_tol,
        verbose=False)

    # Return netNMF result (dimension-reduced propagated patient profiles)
    H_df = pd.DataFrame(H.T, index=prop_data_qnorm.index)

    # Save netNMF result
    # Saving the propagation result
    if 'outdir' in kwargs:
        if 'job_name' in kwargs:
            if 'iteration_label' in kwargs:
                save_path = kwargs['outdir'] + str(
                    kwargs['job_name']) + '_H_' + str(
                        kwargs['iteration_label']) + '.csv'
            else:
                save_path = kwargs['outdir'] + str(
                    kwargs['job_name']) + '_H.csv'
        else:
            if 'iteration_label' in kwargs:
                save_path = kwargs['outdir'] + 'H_' + str(
                    kwargs['iteration_label']) + '.csv'
            else:
                save_path = kwargs['outdir'] + 'H.csv'
        H_df.to_csv(save_path)
        if verbose:
            print 'H matrix saved:', save_path
    else:
        pass
    if verbose:
        print 'pyNBS iteration complete'
    return H_df
コード例 #3
0
ファイル: run_pyNBS.py プロジェクト: gregpoore/pyNBS
    
    # Get network propagation kernel
    if args.propagation_kernel_file is not None:
        # Load propagation kernel
        if args.propagation_kernel_file.endswith('.hdf'):
            kernel = pd.read_hdf(args.propagation_kernel_file)
        else:
            kernel = pd.read_csv(args.propagation_kernel_file)
        if args.verbose:
            print 'Pre-calculated network kernel loaded'
    else:
        if args.calculate_propagation_kernel:
            # Calculate propagation kernel by propagating identity matrix of network
            network_nodes = network.nodes()
            network_I = pd.DataFrame(np.identity(len(network_nodes)), index=network_nodes, columns=network_nodes)
            kernel = prop.network_propagation(network, network_I, args.alpha, verbose=True)  
            if args.verbose:
                print 'Network kernel calculated'
        else:
            kernel = None
            if args.verbose:
                print 'No network kernel established'

    # Construct options dictionary for decomposition
    NBS_options = {'pats_subsample_p' : args.pats_subsample_p, 
                   'gene_subsample_p' : args.gene_subsample_q, 
                   'min_muts' : args.min_muts,
                   'prop_data' : args.propagate_data, 
                   'prop_alpha' : args.alpha, 
                   'prop_symmetric_norm' : args.symmetric_network_normalization, 
                   'qnorm_data' : args.quantile_normalize_data,
コード例 #4
0
ファイル: pyNBS_wrapper.py プロジェクト: gitter-lab/prmf
    nodelist = parse_nodelist(open(args.nodelist))
    arr_sp = parse_gene_lists(nodelist, args.gene_lists)
    arr_df = pd.DataFrame(arr_sp.todense())
    arr_df.columns = nodelist

    # graph regularizer
    network = nx.read_graphml(args.network)
    knnGlap = core.network_inf_KNN_glap(network)

    # diffusion
    alpha = 0.7
    network_nodes = network.nodes()
    network_I = pd.DataFrame(np.identity(len(network_nodes)),
                             index=network_nodes,
                             columns=network_nodes)
    kernel = prop.network_propagation(network,
                                      network_I,
                                      alpha=alpha,
                                      symmetric_norm=True)

    # Run pyNBS core steps
    H_df, W, H = pyNBS_single.NBS_single(arr_df,
                                         knnGlap,
                                         propNet=network,
                                         propNet_kernel=kernel,
                                         k=args.k_latent,
                                         **save_args)
    np.savetxt(os.path.join(args.outdir, "W.csv"), W, delimiter=",")
    np.savetxt(os.path.join(args.outdir, "H.csv"), H, delimiter=",")
コード例 #5
0
def NBS_single(sm_mat,
               options,
               propNet=None,
               propNet_kernel=None,
               regNet_glap=None,
               verbose=True,
               save_path=None):
    # Set default NBS netNMF options
    NBS_options = {
        'pats_subsample_p': 0.8,
        'gene_subsample_p': 0.8,
        'min_muts': 10,
        'prop_data': True,
        'prop_alpha': 0.7,
        'prop_symmetric_norm': False,
        'qnorm_data': True,
        'netNMF_k': 4,
        'netNMF_gamma': 200,
        'netNMF_update_gamma': False,
        'netNMF_gamma_factor': 1,
        'netNMF_niter': 250,
        'netNMF_eps': 1e-15,
        'netNMF_err_tol': 1e-4,
        'netNMF_err_delta_tol': 1e-4
    }

    # Update NBS netNMF options
    for option in options:
        NBS_options[option] = options[option]
    if verbose:
        print 'NBS options set:'
        for option in NBS_options:
            print '\t', option + ':', NBS_options[option]

    # Check for correct input data
    if NBS_options['prop_data']:
        if type(propNet) != nx.Graph:
            raise TypeError('Networkx graph object required for propNet')
    if (NBS_options['netNMF_gamma'] != 0):
        if type(regNet_glap) != pd.DataFrame:
            raise TypeError(
                'netNMF regularization network laplacian (regNet_glap) must be given as Pandas DataFrame'
            )

    # Subsample Data
    sm_mat_subsample = core.subsample_sm_mat(
        sm_mat,
        propNet=propNet,
        pats_subsample_p=NBS_options['pats_subsample_p'],
        gene_subsample_p=NBS_options['gene_subsample_p'],
        min_muts=NBS_options['min_muts'])
    if verbose:
        print 'Somatic mutation data sub-sampling complete'

    # Propagate Data
    if NBS_options['prop_data']:
        if propNet_kernel is None:
            prop_sm_data = prop.network_propagation(
                propNet,
                sm_mat_subsample,
                symmetric_norm=NBS_options['prop_symmetric_norm'],
                alpha=NBS_options['prop_alpha'],
                verbose=verbose)
        else:
            prop_sm_data = prop.network_kernel_propagation(propNet,
                                                           propNet_kernel,
                                                           sm_mat_subsample,
                                                           verbose=verbose,
                                                           save_path=None)
        if verbose:
            print 'Somatic mutation data propagated'
    else:
        prop_sm_data = sm_mat_subsample
        print 'Somatic mutation data not propagated'

    # Quantile Normalize Data
    if NBS_options['qnorm_data']:
        prop_data_qnorm = core.qnorm(prop_sm_data)
        if verbose:
            print 'Somatic mutation data quantile normalized'
    else:
        prop_data_qnorm = prop_sm_data
        print 'Somatic mutation data not quantile normalized'

    # Prepare data for mixed netNMF function
    propNet_nodes = propNet.nodes()
    data_arr = np.array(prop_data_qnorm.T.ix[propNet_nodes])
    regNet_glap_arr = np.array(regNet_glap.ix[propNet_nodes][propNet_nodes])
    # Mixed netNMF Result
    W, H, numIter, finalResid = core.mixed_netNMF(
        data_arr,
        regNet_glap_arr,
        NBS_options['netNMF_k'],
        W_init=None,
        H_init=None,
        gamma=NBS_options['netNMF_gamma'],
        update_gamma=NBS_options['netNMF_update_gamma'],
        gamma_factor=NBS_options['netNMF_gamma_factor'],
        niter=NBS_options['netNMF_niter'],
        eps=NBS_options['netNMF_eps'],
        err_tol=NBS_options['netNMF_err_tol'],
        err_delta_tol=NBS_options['netNMF_err_delta_tol'],
        verbose=verbose,
        debug_mode=False)

    # Save netNMF Result
    H_df = pd.DataFrame(H.T, index=prop_data_qnorm.index)
    if save_path is not None:
        H_df.to_csv(save_path)
        if verbose:
            print 'netNMF result saved:', save_path
    return H_df
コード例 #6
0
ファイル: run_pyNBS.py プロジェクト: decarlin/pyNBS_3
           params['prop_symmetric_norm'])
     print('Save network propagation kernel:', params['save_kernel'])
     if params['save_kernel']:
         print(save_args['outdir'] + str(save_args['job_name']) +
               '_prop_kernel.csv')
 # Calculate propagation kernel by propagating identity matrix of network
 network_nodes = network.nodes()
 network_I = pd.DataFrame(np.identity(len(network_nodes)),
                          index=network_nodes,
                          columns=network_nodes)
 if params['save_kernel']:
     save_args['iteration_label'] = 'kernel'
     kernel = prop.network_propagation(
         network,
         network_I,
         alpha=params['prop_alpha'],
         symmetric_norm=params['prop_symmetric_norm'],
         verbose=verbose,
         **save_args)
 else:
     kernel = prop.network_propagation(
         network,
         network_I,
         alpha=params['prop_alpha'],
         symmetric_norm=params['prop_symmetric_norm'],
         verbose=verbose)
 print()
 print(
     '##################################################################################'
 )
 print('# Performing', params['niter'], 'iterations of pyNBS')