def map_process(obj, reduce_dict, axes, map_name): """ process the dimensionality reduction command """ # project if 'type' in reduce_dict.keys() and reduce_dict['type'] == 'RAW': proj = obj['design_matrix'] if obj['map_options']['peratom']: proj_atomic = obj['design_matrix_atomic'] else: proj_atomic = None else: from asaplib.reducedim import Dimension_Reducers dreducer = Dimension_Reducers(reduce_dict) proj = dreducer.fit_transform(obj['design_matrix']) if obj['map_options']['peratom']: print("Project atomic design matrix with No. of samples:", len(obj['design_matrix_atomic'])) proj_atomic = dreducer.transform(obj['design_matrix_atomic']) else: proj_atomic = None # plot fig_spec = obj['fig_options'] plotcolor = obj['map_options']['color'] plotcolor_atomic = obj['map_options']['color_atomic'] annotate = obj['map_options']['annotate'] if 'cluster_labels' in obj.keys(): labels = obj['cluster_labels'] else: labels = [] map_plot(fig_spec, proj, proj_atomic, plotcolor, plotcolor_atomic, labels, annotate, axes) # output outfilename = obj['fig_options']['outfile'] outmode = obj['map_options']['outmode'] species_name = obj['map_options']['only_use_species'] if obj['map_options']['project_atomic']: map_save(outfilename, outmode, obj['asapxyz'], None, proj, map_name, species_name) else: map_save(outfilename, outmode, obj['asapxyz'], proj, proj_atomic, map_name, species_name)
def main(): """ Test if dimensionality reduction is working. Parameters ---------- fxyz: string giving location of xyz file prefix: string giving the filename prefix """ fxyz = os.path.join(os.path.split(__file__)[0], 'small_molecules-SOAP.xyz') fmat = ['SOAP-n4-l3-c1.9-g0.23'] fcolor = 'dft_formation_energy_per_atom_in_eV' pca_d = 10 prefix = "test-dimensionality-reduction" foutput = prefix + "-pca-d" + str(pca_d) # try to read the xyz file asapxyz = ASAPXYZ(fxyz) desc, _ = asapxyz.get_descriptors(fmat, False) print(desc) """ reduce_dict = { "pca": {"type": 'PCA', 'parameter':{"n_components": pca_d, "scalecenter": scale}} } reduce_dict = { "preprocessing": {"type": 'SCALE', 'parameter': None}, "umap": {"type": 'UMAP', 'parameter':{"n_components": pca_d, "n_neighbors": 10}} } reduce_dict = { "reduce1_pca": {"type": 'PCA', 'parameter':{"n_components": 20, "scalecenter":True}}, "reduce2_tsne": {"type": 'TSNE', 'parameter': {"n_components": 2, "perplexity":20}} } """ reduce_dict = { "preprocessing": {"type": 'SCALE', 'parameter': None}, "skpca": {"type": 'SPARSE_KPCA', 'parameter':{"n_components": pca_d, "kernel": {"first_kernel": {"type": 'linear', "normalize": True}} } } } dreducer = Dimension_Reducers(reduce_dict) proj = dreducer.fit_transform(desc) # save asapxyz.set_descriptors(proj, 'pca_coord') asapxyz.write(foutput) # color scheme plotcolor, plotcolor_peratom, colorlabel, colorscale = set_color_function(fcolor, asapxyz) outfile = 'PCA_4_' + prefix + '-c-' + fcolor + '.png' fig_spec_dict = { 'outfile': outfile, 'show': False, 'title': None, 'xlabel': 'Principal Axis 1', 'ylabel': 'Principal Axis 2', 'xaxis': True, 'yaxis': True, 'remove_tick': False, 'rasterized': True, 'fontsize': 16, 'components':{ "first_p": {"type": 'scatter', 'clabel': colorlabel}, "second_p": {"type": 'annotate', 'adtext': False} } } asap_plot = Plotters(fig_spec_dict) asap_plot.plot(proj[::-1, [0, 1]], plotcolor[::-1], [], []) plt.show()
def main(fmat, fxyz, ftags, fcolor, colorscol, prefix, output, peratom, keepraw, scale, pca_d, pc1, pc2, projectatomic, plotatomic, adtext): """ Parameters ---------- fmat: Location of descriptor matrix file or name of the tags in ase xyz file. You can use gen_descriptors.py to compute it. fxyz: Location of xyz file for reading the properties. ftags: Location of tags for the first M samples. Plot the tags on the PCA map. fcolor: Location of a file or name of the tags in ase xyz file. It should contain properties for all samples (N floats) used to color the scatterplot' colorscol: The column number of the properties used for the coloring. Starts from 0. prefix: Filename prefix, default is ASAP output: The format for output files ([xyz], [matrix]). Default is xyz. peratom: Whether to output per atom pca coordinates (True/False) keepraw: Whether to keep the high dimensional descriptor when output is an xyz file (True/False) scale: Scale the coordinates (True/False). Scaling highly recommanded. pca_d: Number of the principle components to keep pc1: Plot the projection along which principle axes pc2: Plot the projection along which principle axes projectatomic: build the projection using the (big) atomic descriptor matrix plotatomic: Plot the PCA coordinates of all atomic environments (True/False) adtext: Whether to adjust the texts (True/False) Returns ------- """ foutput = prefix + "-pca-d" + str(pca_d) use_atomic_desc = (peratom or plotatomic or projectatomic) # try to read the xyz file if fxyz != 'none': asapxyz = ASAPXYZ(fxyz) desc, desc_atomic = asapxyz.get_descriptors(fmat, use_atomic_desc) if projectatomic: desc = desc_atomic.copy() else: asapxyz = None print("Did not provide the xyz file. We can only output descriptor matrix.") output = 'matrix' # we can also load the descriptor matrix from a standalone file if os.path.isfile(fmat[0]): try: desc = np.genfromtxt(fmat[0], dtype=float) print("loaded the descriptor matrix from file: ", fmat) except: raise ValueError('Cannot load the descriptor matrix from file') # sanity check if len(desc) == 0: raise ValueError('Please supply descriptor in a xyz file or a standlone descriptor matrix') print("shape of the descriptor matrix: ", np.shape(desc), "number of descriptors: ", np.shape(desc[0])) if ftags != 'none': tags = np.loadtxt(ftags, dtype="str")[:] ndict = len(tags) else: tags = [] reduce_dict = { "pca": {"type": 'PCA', 'parameter':{"n_components": pca_d, "scalecenter": scale}} } """ reduce_dict = { "umap": {"type": 'UMAP', 'parameter':{"n_components": pca_d, "n_neighbors": 10}} } reduce_dict = { "reduce1_pca": {"type": 'PCA', 'parameter':{"n_components": 20, "scalecenter":True}}, "reduce2_tsne": {"type": 'TSNE', 'parameter': {"n_components": 2, "perplexity":20}} } """ dreducer = Dimension_Reducers(reduce_dict) proj = dreducer.fit_transform(desc) if peratom or plotatomic and not projectatomic: proj_atomic_all = dreducer.transform(desc_atomic) # save if output == 'matrix': np.savetxt(foutput + ".coord", proj, fmt='%4.8f', header='low D coordinates of samples') if peratom: np.savetxt(foutput + "-atomic.coord", proj_atomic_all, fmt='%4.8f', header='low D coordinates of samples') if output == 'xyz': asapxyz.set_descriptors(proj, 'pca_coord') if peratom: asapxyz.set_atomic_descriptors(proj_atomic_all, 'pca_coord') # remove the raw descriptors if not keepraw: asapxyz.remove_descriptors(fmat) asapxyz.remove_atomic_descriptors(fmat) asapxyz.write(foutput) # color scheme plotcolor, plotcolor_peratom, colorlabel, colorscale = set_color_function(fcolor, asapxyz, colorscol, 0, (peratom or plotatomic), projectatomic) if plotatomic: outfile = 'PCA_4_' + prefix + '-c-' + fcolor + '-plotatomic.png' else: outfile = 'PCA_4_' + prefix + '-c-' + fcolor + '.png' fig_spec_dict = { 'outfile': outfile, 'show': False, 'title': None, 'xlabel': 'Principal Axis 1', 'ylabel': 'Principal Axis 2', 'xaxis': True, 'yaxis': True, 'remove_tick': False, 'rasterized': True, 'fontsize': 16, 'components':{ "first_p": {"type": 'scatter', 'clabel': colorlabel}, "second_p": {"type": 'annotate', 'adtext': adtext} } } asap_plot = Plotters(fig_spec_dict) asap_plot.plot(proj[::-1, [pc1, pc2]], plotcolor[::-1], [], tags) if peratom or plotatomic and not projectatomic: asap_plot.plot(proj_atomic_all[::-1, [pc1, pc2]], plotcolor_peratom[::-1],[],[]) plt.show()
"n_sparse": -1, # no sparsification # "scale":True, "kernel": { "first_kernel": { "type": 'linear' } } } } #reduce_dict['skpca'] = {"type": 'PCA', # 'parameter':{"n_components": 10, # }} from asaplib.reducedim import Dimension_Reducers dreducer = Dimension_Reducers(reduce_dict) dm = asapxyz.get_atomic_descriptors(['SOAP-n6-l6-c6.0-g0.44'], 14) proj = dreducer.fit_transform(dm) from asaplib.plot import Plotters fig_spec = { 'outfile': 'test.png', 'show': False, 'title': None, 'size': [8 * 1.1, 8], 'cmap': 'gnuplot', 'components': { 'first_p': { 'type': 'scatter',