Esempio n. 1
0
def map_process(obj, reduce_dict, axes, map_name):
    """
    process the dimensionality reduction command
    """
    # project
    if 'type' in reduce_dict.keys() and reduce_dict['type'] == 'RAW':
        proj = obj['design_matrix']
        if obj['map_options']['peratom']:
            proj_atomic = obj['design_matrix_atomic']
        else:
            proj_atomic = None
    else:
        from asaplib.reducedim import Dimension_Reducers
        dreducer = Dimension_Reducers(reduce_dict)
        proj = dreducer.fit_transform(obj['design_matrix'])
        if obj['map_options']['peratom']:
            print("Project atomic design matrix with No. of samples:",
                  len(obj['design_matrix_atomic']))
            proj_atomic = dreducer.transform(obj['design_matrix_atomic'])
        else:
            proj_atomic = None
    # plot
    fig_spec = obj['fig_options']
    plotcolor = obj['map_options']['color']
    plotcolor_atomic = obj['map_options']['color_atomic']
    annotate = obj['map_options']['annotate']
    if 'cluster_labels' in obj.keys():
        labels = obj['cluster_labels']
    else:
        labels = []
    map_plot(fig_spec, proj, proj_atomic, plotcolor, plotcolor_atomic, labels,
             annotate, axes)
    # output
    outfilename = obj['fig_options']['outfile']
    outmode = obj['map_options']['outmode']
    species_name = obj['map_options']['only_use_species']
    if obj['map_options']['project_atomic']:
        map_save(outfilename, outmode, obj['asapxyz'], None, proj, map_name,
                 species_name)
    else:
        map_save(outfilename, outmode, obj['asapxyz'], proj, proj_atomic,
                 map_name, species_name)
Esempio n. 2
0
def main():
    """

    Test if dimensionality reduction is working.

    Parameters
    ----------
    fxyz: string giving location of xyz file
    prefix: string giving the filename prefix
    """
    fxyz = os.path.join(os.path.split(__file__)[0], 'small_molecules-SOAP.xyz')
    fmat = ['SOAP-n4-l3-c1.9-g0.23']
    fcolor = 'dft_formation_energy_per_atom_in_eV' 
    pca_d = 10
    prefix = "test-dimensionality-reduction"
    foutput = prefix + "-pca-d" + str(pca_d)

    # try to read the xyz file
    asapxyz = ASAPXYZ(fxyz)
    desc, _ = asapxyz.get_descriptors(fmat, False)

    print(desc)
    """
    reduce_dict = { "pca": 
                   {"type": 'PCA', 'parameter':{"n_components": pca_d, "scalecenter": scale}}
                  }
    
    reduce_dict = {
                   "preprocessing": {"type": 'SCALE', 'parameter': None},
                   "umap":
                   {"type": 'UMAP', 'parameter':{"n_components": pca_d, "n_neighbors": 10}}
                  }    
    
    reduce_dict = {
        "reduce1_pca": {"type": 'PCA', 'parameter':{"n_components": 20, "scalecenter":True}},
        "reduce2_tsne": {"type": 'TSNE', 'parameter': {"n_components": 2, "perplexity":20}}
        }
    """

    reduce_dict = {
                   "preprocessing": {"type": 'SCALE', 'parameter': None},
                   "skpca":
                   {"type": 'SPARSE_KPCA', 
                   'parameter':{"n_components": pca_d, 
                                "kernel": {"first_kernel": {"type": 'linear', "normalize": True}}
                                }
                    }
                  }  

    dreducer = Dimension_Reducers(reduce_dict)

    proj = dreducer.fit_transform(desc)
    
    # save
    asapxyz.set_descriptors(proj, 'pca_coord')
    asapxyz.write(foutput)

    # color scheme
    plotcolor, plotcolor_peratom, colorlabel, colorscale = set_color_function(fcolor, asapxyz)

    outfile = 'PCA_4_' + prefix + '-c-' + fcolor + '.png'

    fig_spec_dict = {
        'outfile': outfile,
        'show': False,
        'title': None,
        'xlabel': 'Principal Axis 1',
        'ylabel': 'Principal Axis 2',
        'xaxis': True,  'yaxis': True,
        'remove_tick': False,
        'rasterized': True,
        'fontsize': 16,
        'components':{ 
            "first_p": {"type": 'scatter', 'clabel': colorlabel},
            "second_p": {"type": 'annotate', 'adtext': False}
             }
        }
    asap_plot = Plotters(fig_spec_dict)
    asap_plot.plot(proj[::-1, [0, 1]], plotcolor[::-1], [], [])
    plt.show()
Esempio n. 3
0
def main(fmat, fxyz, ftags, fcolor, colorscol, prefix, output, peratom, keepraw, scale, pca_d, pc1, pc2, projectatomic, plotatomic,
         adtext):
    """

    Parameters
    ----------
    fmat: Location of descriptor matrix file or name of the tags in ase xyz file. You can use gen_descriptors.py to compute it.
    fxyz: Location of xyz file for reading the properties.
    ftags: Location of tags for the first M samples. Plot the tags on the PCA map.
    fcolor: Location of a file or name of the tags in ase xyz file. It should contain properties for all samples (N floats) used to color the scatterplot'
    colorscol: The column number of the properties used for the coloring. Starts from 0.
    prefix: Filename prefix, default is ASAP
    output: The format for output files ([xyz], [matrix]). Default is xyz.
    peratom: Whether to output per atom pca coordinates (True/False)
    keepraw: Whether to keep the high dimensional descriptor when output is an xyz file (True/False)
    scale: Scale the coordinates (True/False). Scaling highly recommanded.
    pca_d: Number of the principle components to keep
    pc1: Plot the projection along which principle axes
    pc2: Plot the projection along which principle axes
    projectatomic: build the projection using the (big) atomic descriptor matrix
    plotatomic: Plot the PCA coordinates of all atomic environments (True/False)
    adtext: Whether to adjust the texts (True/False)

    Returns
    -------

    """

    foutput = prefix + "-pca-d" + str(pca_d)
    use_atomic_desc = (peratom or plotatomic or projectatomic)

    # try to read the xyz file
    if fxyz != 'none':
        asapxyz = ASAPXYZ(fxyz)
        desc, desc_atomic = asapxyz.get_descriptors(fmat, use_atomic_desc)
        if projectatomic: desc = desc_atomic.copy()
    else:
        asapxyz = None
        print("Did not provide the xyz file. We can only output descriptor matrix.")
        output = 'matrix'
    # we can also load the descriptor matrix from a standalone file
    if os.path.isfile(fmat[0]):
        try:
            desc = np.genfromtxt(fmat[0], dtype=float)
            print("loaded the descriptor matrix from file: ", fmat)
        except:
            raise ValueError('Cannot load the descriptor matrix from file')
    # sanity check
    if len(desc) == 0:
        raise ValueError('Please supply descriptor in a xyz file or a standlone descriptor matrix')
    print("shape of the descriptor matrix: ", np.shape(desc), "number of descriptors: ", np.shape(desc[0]))

    if ftags != 'none':
        tags = np.loadtxt(ftags, dtype="str")[:]
        ndict = len(tags)
    else:
        tags = []


    reduce_dict = { "pca": 
                   {"type": 'PCA', 'parameter':{"n_components": pca_d, "scalecenter": scale}}
                  }
    """
    reduce_dict = { "umap": 
                   {"type": 'UMAP', 'parameter':{"n_components": pca_d, "n_neighbors": 10}}
                  }    

    reduce_dict = {
        "reduce1_pca": {"type": 'PCA', 'parameter':{"n_components": 20, "scalecenter":True}},
        "reduce2_tsne": {"type": 'TSNE', 'parameter': {"n_components": 2, "perplexity":20}}
        }
    """
    dreducer = Dimension_Reducers(reduce_dict)

    proj = dreducer.fit_transform(desc)
    if peratom or plotatomic and not projectatomic:
        proj_atomic_all = dreducer.transform(desc_atomic)

    # save
    if output == 'matrix':
        np.savetxt(foutput + ".coord", proj, fmt='%4.8f', header='low D coordinates of samples')
        if peratom:  
            np.savetxt(foutput + "-atomic.coord", proj_atomic_all, fmt='%4.8f', header='low D coordinates of samples')
    if output == 'xyz':
        asapxyz.set_descriptors(proj, 'pca_coord')
        if peratom:
            asapxyz.set_atomic_descriptors(proj_atomic_all, 'pca_coord')
        # remove the raw descriptors
        if not keepraw:
            asapxyz.remove_descriptors(fmat)
            asapxyz.remove_atomic_descriptors(fmat)
        asapxyz.write(foutput)

    # color scheme
    plotcolor, plotcolor_peratom, colorlabel, colorscale = set_color_function(fcolor, asapxyz, colorscol, 0, (peratom or plotatomic), projectatomic)

    if plotatomic:
        outfile = 'PCA_4_' + prefix + '-c-' + fcolor + '-plotatomic.png'
    else:
        outfile = 'PCA_4_' + prefix + '-c-' + fcolor + '.png'

    fig_spec_dict = {
        'outfile': outfile,
        'show': False,
        'title': None,
        'xlabel': 'Principal Axis 1',
        'ylabel': 'Principal Axis 2',
        'xaxis': True,  'yaxis': True,
        'remove_tick': False,
        'rasterized': True,
        'fontsize': 16,
        'components':{ 
            "first_p": {"type": 'scatter', 'clabel': colorlabel},
            "second_p": {"type": 'annotate', 'adtext': adtext}
             }
        }
    asap_plot = Plotters(fig_spec_dict)
    asap_plot.plot(proj[::-1, [pc1, pc2]], plotcolor[::-1], [], tags)
    if peratom or plotatomic and not projectatomic:
        asap_plot.plot(proj_atomic_all[::-1, [pc1, pc2]], plotcolor_peratom[::-1],[],[])
    plt.show()
Esempio n. 4
0
        "n_sparse": -1,  # no sparsification
        #                                     "scale":True,
        "kernel": {
            "first_kernel": {
                "type": 'linear'
            }
        }
    }
}

#reduce_dict['skpca'] = {"type": 'PCA',
#                        'parameter':{"n_components": 10,
#                         }}
from asaplib.reducedim import Dimension_Reducers

dreducer = Dimension_Reducers(reduce_dict)

dm = asapxyz.get_atomic_descriptors(['SOAP-n6-l6-c6.0-g0.44'], 14)
proj = dreducer.fit_transform(dm)

from asaplib.plot import Plotters

fig_spec = {
    'outfile': 'test.png',
    'show': False,
    'title': None,
    'size': [8 * 1.1, 8],
    'cmap': 'gnuplot',
    'components': {
        'first_p': {
            'type': 'scatter',