def configure_default(typemap={}, types=[]): # Logging soap.silence() soap.soapy.wrap.PowerSpectrum.verbose = False # Descriptor options options_soap = { "spectrum.2d": False, "spectrum.gradients": True, "spectrum.global": False, "spectrum.2l1_norm": False, # NOTE "False" emphasizes coordination, "True" distances "radialbasis.type": "gaussian", "radialbasis.mode": "adaptive", # NOTE Alternatives: 'equispaced' or 'adaptive' "radialbasis.N": 9, "radialbasis.sigma": 0.5, "radialbasis.integration_steps": 15, "radialcutoff.Rc": 3.5, # NOTE Only used for 'equispaced' basis set "radialcutoff.Rc_width": 0.5, "radialcutoff.type": "heaviside", "radialcutoff.center_weight": 1.0, "angularbasis.type": "spherical-harmonic", "angularbasis.L": 6, "kernel.adaptor": "specific-unique-dmap", "exclude_centers": ["H"], "exclude_targets": [], "exclude_center_ids": [], "exclude_target_ids": [] } # Storage soap.soapy.wrap.PowerSpectrum.settings = { 'cxx_compute_power': True, 'store_cxx_serial': False, 'store_cmap': False, 'store_gcmap': False, 'store_sd': False, 'store_gsd': False, 'store_sdmap': False, 'store_gsdmap': False, 'dtype': 'float64' # NOTE Not (yet) used } # Use (alchemical) type embedding if len(types) and len(typemap): raise ValueError( "Both types and typemap non-zero, can only specify one.") elif len(types): soap.encoder.clear() for c in types: soap.encoder.add(c) elif len(typemap): log << "Using %d-dimensional type embedding" % len( typemap["channels"]) << log.endl soap.encoder.clear() for c in typemap["channels"]: soap.encoder.add(c) PowerSpectrum.struct_converter = soap.soapy.wrap.StructureConverter( typemap=typemap) log << "Using type encoder with %d types:" % (len( soap.encoder.types())) << ",".join(soap.encoder.types()) << log.endl return options_soap
def soap_configure_default(typemap={}, types=[]): # Logging soap.silence() soap.soapy.wrap.PowerSpectrum.verbose = True # Storage soap.soapy.wrap.PowerSpectrum.settings = { 'cxx_compute_power' : True, 'store_cxx_serial' : False, 'store_cmap' : False, 'store_gcmap' : False, 'store_sd' : False, 'store_gsd' : False, 'store_sdmap' : False, 'store_gsdmap' : False, 'dtype': 'float64' # NOTE Not (yet) used } # Use (alchemical) type embedding if len(types) and len(typemap): raise ValueError("Both types and typemap non-zero, can only specify one.") elif len(types): soap.encoder.clear() for c in types: soap.encoder.add(c) elif len(typemap): log << "Using %d-dimensional type embedding" % len(typemap["channels"]) << log.endl soap.encoder.clear() for c in typemap["channels"]: soap.encoder.add(c) PowerSpectrum.struct_converter = soap.soapy.wrap.StructureConverter(typemap=typemap) log << "Using type encoder with %d types:" % (len(soap.encoder.types())) << ",".join(soap.encoder.types()) << log.endl return
def kernel_attribute(dset1, dset2, kernel_options, kweights, xi): delta_Y = [] if kernel_options["topkernel_type"] == "average": for i in range(len(dset2)): dset2[i].sum() dset2[i].normalize() kernel = get_cxx_kernel(kernel_options) soap.silence() for i in range(len(dset1)): log << log.back << "Attribute" << i << log.flush Ki = kernel.attributeLeft(dset1[i], dset2, "float64") dset_i = soap.DMapMatrixSet() dset_i.append(dset1[i]) # TO CHECK # >>> K = kernel.evaluate(dset_i, dset2, False, "float64") # >>> print Ki # >>> print np.sum(Ki, axis=0), "==", K # >>> raw_input('...') Kii = kernel.evaluate(dset_i, dset_i, True, "float64") Ki = Ki/np.sum(Kii)**0.5 # Account for top-level exponent xi Ki = Ki*np.sum(Ki, axis=0)**(xi-1) delta_Yi = Ki.dot(kweights) delta_Y.append(list(delta_Yi)) log << log.endl return delta_Y
def soap_configure_default(typemap={}, types=[]): # Logging soap.silence() soap.soapy.wrap.PowerSpectrum.verbose = True # Storage soap.soapy.wrap.PowerSpectrum.settings = { 'cxx_compute_power': True, 'store_cxx_serial': False, 'store_cmap': False, 'store_gcmap': False, 'store_sd': False, 'store_gsd': False, 'store_sdmap': False, 'store_gsdmap': False, 'dtype': 'float64' # NOTE Not (yet) used } # Use (alchemical) type embedding if len(types) and len(typemap): raise ValueError( "Both types and typemap non-zero, can only specify one.") elif len(types): soap.encoder.clear() for c in types: soap.encoder.add(c) elif len(typemap): log << "Using %d-dimensional type embedding" % len( typemap["channels"]) << log.endl soap.encoder.clear() for c in typemap["channels"]: soap.encoder.add(c) PowerSpectrum.struct_converter = soap.soapy.wrap.StructureConverter( typemap=typemap) log << "Using type encoder with %d types:" % (len( soap.encoder.types())) << ",".join(soap.encoder.types()) << log.endl return
def kernel_attribute(dset1, dset2, kernel_options, kweights, xi): delta_Y = [] if kernel_options["topkernel_type"] == "average": for i in range(len(dset2)): dset2[i].sum() dset2[i].normalize() kernel = get_cxx_kernel(kernel_options) soap.silence() for i in range(len(dset1)): log << log.back << "Attribute" << i << log.flush Ki = kernel.attributeLeft(dset1[i], dset2, "float64") dset_i = soap.DMapMatrixSet() dset_i.append(dset1[i]) # TO CHECK # >>> K = kernel.evaluate(dset_i, dset2, False, "float64") # >>> print Ki # >>> print np.sum(Ki, axis=0), "==", K # >>> raw_input('...') Kii = kernel.evaluate(dset_i, dset_i, True, "float64") Ki = Ki / np.sum(Kii)**0.5 # Account for top-level exponent xi Ki = Ki * np.sum(Ki, axis=0)**(xi - 1) delta_Yi = Ki.dot(kweights) delta_Y.append(list(delta_Yi)) log << log.endl return delta_Y
def configure(types=[], silent=True, verbose=False): if silent: soap.silence() if verbose: soap.soapy.wrap.PowerSpectrum.verbose = False soap.encoder.clear() if len(types) > 0: for c in types: soap.encoder.add(c) return
def configure_default_2d(laplace_cutoff=6, typemap={}, types=[]): # Logging soap.silence() soap.soapy.wrap.PowerSpectrum.verbose = True # Descriptor options options_soap = { "spectrum.2d": True, "spectrum.gradients": False, "spectrum.global": False, "spectrum.2l1_norm": True, "radialbasis.type": "discrete", "radialbasis.N": laplace_cutoff + 1, "radialcutoff.Rc": laplace_cutoff + 0.5, "radialcutoff.Rc_width": 0.5, "radialcutoff.type": "shifted-cosine", "radialcutoff.center_weight": 1.0, "angularbasis.type": "spherical-harmonic", "angularbasis.L": 2, "kernel.adaptor": "specific-unique-dmap", "exclude_centers": ["H"], "exclude_targets": [], "exclude_center_ids": [], "exclude_target_ids": [] } # Storage soap.soapy.wrap.PowerSpectrum.settings = { 'cxx_compute_power': True, 'store_cxx_serial': False, 'store_cmap': False, 'store_gcmap': False, 'store_sd': False, 'store_gsd': False, 'store_sdmap': False, 'store_gsdmap': False, 'dtype': 'float64' # NOTE Not (yet) used } # Structure converter needs to additionally calculate Laplacian PowerSpectrum.struct_converter = soap.soapy.wrap.StructureConverter( laplace_cutoff=laplace_cutoff) # Use (alchemical) type embedding if len(types) and len(typemap): raise ValueError( "Both types and typemap non-zero, can only specify one.") elif len(types): soap.encoder.clear() for c in types: soap.encoder.add(c) elif len(typemap): log << "Using %d-dimensional type embedding" % len( typemap["channels"]) << log.endl soap.encoder.clear() for c in typemap["channels"]: soap.encoder.add(c) PowerSpectrum.struct_converter = soap.soapy.wrap.StructureConverter( typemap=typemap, laplace_cutoff=laplace_cutoff) log << "Using type encoder with %d types:" % (len( soap.encoder.types())) << ",".join(soap.encoder.types()) << log.endl return options_soap
def configure_default_2d(laplace_cutoff=6, typemap={}, types=[]): # Logging soap.silence() soap.soapy.wrap.PowerSpectrum.verbose = True # Descriptor options options_soap = { "spectrum.2d": True, "spectrum.gradients": False, "spectrum.global": False, "spectrum.2l1_norm": True, "radialbasis.type" : "discrete", "radialbasis.N" : laplace_cutoff+1, "radialcutoff.Rc": laplace_cutoff+0.5, "radialcutoff.Rc_width": 0.5, "radialcutoff.type": "shifted-cosine", "radialcutoff.center_weight": 1.0, "angularbasis.type": "spherical-harmonic", "angularbasis.L": 2, "kernel.adaptor": "specific-unique-dmap", "exclude_centers": ["H"], "exclude_targets": [], "exclude_center_ids": [], "exclude_target_ids": [] } # Storage soap.soapy.wrap.PowerSpectrum.settings = { 'cxx_compute_power' : True, 'store_cxx_serial' : False, 'store_cmap' : False, 'store_gcmap' : False, 'store_sd' : False, 'store_gsd' : False, 'store_sdmap' : False, 'store_gsdmap' : False, 'dtype': 'float64' # NOTE Not (yet) used } # Structure converter needs to additionally calculate Laplacian PowerSpectrum.struct_converter = soap.soapy.wrap.StructureConverter( laplace_cutoff=laplace_cutoff) # Use (alchemical) type embedding if len(types) and len(typemap): raise ValueError("Both types and typemap non-zero, can only specify one.") elif len(types): soap.encoder.clear() for c in types: soap.encoder.add(c) elif len(typemap): log << "Using %d-dimensional type embedding" % len(typemap["channels"]) << log.endl soap.encoder.clear() for c in typemap["channels"]: soap.encoder.add(c) PowerSpectrum.struct_converter = soap.soapy.wrap.StructureConverter( typemap=typemap, laplace_cutoff=laplace_cutoff) log << "Using type encoder with %d types:" % (len(soap.encoder.types())) << ",".join(soap.encoder.types()) << log.endl return options_soap
def configure_default(typemap={}, types=[]): # Logging soap.silence() soap.soapy.wrap.PowerSpectrum.verbose = False # Descriptor options options_soap = { "spectrum.2d": False, "spectrum.gradients": True, "spectrum.global": False, "spectrum.2l1_norm": False, # NOTE "False" emphasizes coordination, "True" distances "radialbasis.type" : "gaussian", "radialbasis.mode" : "adaptive", # NOTE Alternatives: 'equispaced' or 'adaptive' "radialbasis.N" : 9, "radialbasis.sigma": 0.5, "radialbasis.integration_steps": 15, "radialcutoff.Rc": 3.5, # NOTE Only used for 'equispaced' basis set "radialcutoff.Rc_width": 0.5, "radialcutoff.type": "heaviside", "radialcutoff.center_weight": 1.0, "angularbasis.type": "spherical-harmonic", "angularbasis.L": 6, "kernel.adaptor": "specific-unique-dmap", "exclude_centers": ["H"], "exclude_targets": [], "exclude_center_ids": [], "exclude_target_ids": [] } # Storage soap.soapy.wrap.PowerSpectrum.settings = { 'cxx_compute_power' : True, 'store_cxx_serial' : False, 'store_cmap' : False, 'store_gcmap' : False, 'store_sd' : False, 'store_gsd' : False, 'store_sdmap' : False, 'store_gsdmap' : False, 'dtype': 'float64' # NOTE Not (yet) used } # Use (alchemical) type embedding if len(types) and len(typemap): raise ValueError("Both types and typemap non-zero, can only specify one.") elif len(types): soap.encoder.clear() for c in types: soap.encoder.add(c) elif len(typemap): log << "Using %d-dimensional type embedding" % len(typemap["channels"]) << log.endl soap.encoder.clear() for c in typemap["channels"]: soap.encoder.add(c) PowerSpectrum.struct_converter = soap.soapy.wrap.StructureConverter(typemap=typemap) log << "Using type encoder with %d types:" % (len(soap.encoder.types())) << ",".join(soap.encoder.types()) << log.endl return options_soap
restore_positions(structure, positions_orig) positions = [ part.pos for part in structure ] print positions """ logging.basicConfig(format='[%(asctime)s] %(message)s', datefmt='%I:%M:%S', level=logging.ERROR) verbose = False positions_0 = [part.pos for part in structure] print "Positions at start" for p in positions_0: print p soap.silence() kernelpot = KernelPotential(options) kernelpot.acquire(structure, 1.) """ for p in structure: dpos = np.array([0.,0.,0.]) dpos[0] = np.random.uniform(-1.,1.) dpos[1] = np.random.uniform(-1.,1.) dpos[2] = np.random.uniform(-1.,1.) p.pos = p.pos + 0.2*dpos """ """ positions = [ np.array([1.,0.,0.]), np.array([1.5,0.1,0.]), np.array([-0.7,0.1,0.]) ] restore_positions(structure, positions) """
# Parallelization parameters # How many processors? n_procs # How many tasks per batch? batch_size # How many tasks read upon demand? chunk_size # Kernel matrix block size per task? mp_kernel_block_size x mp_kernel_block_size # Command-line options log = soap.soapy.momo.osio log.Connect() log.AddArg('folder', typ=str, help="Data folder as execution target") log.AddArg('config_file', typ=str, help="xyz-trajectory file") log.AddArg('types_compile', typ=bool, help="Whether or not to compile particle types from dataset or use those specified in options file") log.AddArg('label_key', typ=str, help="Key storing unique identifier in <config_file>") log.AddArg('options', typ=str, help="Options file (json)") log.AddArg('hdf5_out', typ=str, help="Output hdf5 file name") log.AddArg('select', typ=int, default=-1, help="Actives to select") log.AddArg('n_procs', typ=int, default=1, help="Number of processors") log.AddArg('mp_kernel_block_size', typ=int, default=-1, help="Linear block size for kernel computation") log.AddArg('graph', typ=bool, default=True, help="Whether or not to compute graph first. If not, load from hdf5_out.") log.AddArg('kernel', typ=bool, default=True, help="Whether or not to compute kernel") cmdline_options = log.Parse() json_options = soap.soapy.util.json_load_utf8(open(cmdline_options.options)) # Run log.cd(cmdline_options.folder) soap.silence() run(log=log, cmdline_options=cmdline_options, json_options=json_options) log.root()
log.AddArg("ranking", typ=str, default="cov", help="Ranking criterion: 'cov', 'cov*q', 'dcov*dq'") log.AddArg("decompose", typ=str, default="", help="Covariance decomposition: 'global', 'top', 'global+top'") options = log.Parse() # PREPARATORY STEPS state_base = rmt.State().unpickle(options.statefile) cv_iterator = soap.soapy.npfga.cv_iterator[options.cv_mode](state_base, options) np.random.seed(options.seed) if not options.verbose: soap.silence() if not os.path.exists(options.output_folder): log >> 'mkdir -p %s' % options.output_folder # Generate random feature matrices log << log.mg << "Sample random feature matrices" << log.endl rand_IX_list_base = soap.soapy.npfga.RandomizeMatrix( method="perm_within_cols").sample(X=state_base["IX"], n_samples=options.sample, seed=None, log=log) # Generate graph fgraph = soap.soapy.npfga.generate_graph( state_base["features_with_props"], uop_list=options.uop,
def calc_soap_cross_similarity(ase_atoms_list_rows, ase_atoms_list_cols, tmp_folder, h5_filename, options): h5_file = os.path.abspath(os.path.normpath(os.path.join(tmp_folder, h5_filename))) log = soap.soapy.momo.osio # Setup HDF5 storage h5 = h5py.File(h5_file, 'w') # ============== # Compute graphs # ============== log << log.mg << "Computing graphs ..." << log.endl # Options descriptor_type = options['descriptor']['type'] descriptor_options = options['descriptor'][descriptor_type] log << "Descriptor" << descriptor_type << json.dumps( descriptor_options, indent=2, sort_keys=True) << log.endl # Compute graphs_rows = [ compute_graph( config=config, descriptor_options=descriptor_options, log=log ) for config in ase_atoms_list_rows ] graphs_cols = [ compute_graph( config=config, descriptor_options=descriptor_options, log=log ) for config in ase_atoms_list_cols ] # Store h5_graphs_rows = h5.create_group('/graphs-rows') h5_graphs_cols = h5.create_group('/graphs-cols') h5.attrs['descriptor_options'] = json.dumps(descriptor_options) for g in graphs_rows: g.save_to_h5(h5_graphs_rows) for g in graphs_cols: g.save_to_h5(h5_graphs_cols) # Optional: Save labels labels_rows = np.zeros((len(h5_graphs_rows),), dtype=[('idx', 'i8'), ('tag', 'a32')]) labels_cols = np.zeros((len(h5_graphs_cols),), dtype=[('idx', 'i8'), ('tag', 'a32')]) for g in h5_graphs_rows.iteritems(): idx = int(g[0]) tag = g[1].attrs['label'] g_info = json.loads(g[1].attrs['graph_info']) labels_rows[idx] = (idx, tag) for g in h5_graphs_cols.iteritems(): idx = int(g[0]) tag = g[1].attrs['label'] g_info = json.loads(g[1].attrs['graph_info']) labels_cols[idx] = (idx, tag) h5_labels_rows = h5.create_group('labels-rows') h5_labels_cols = h5.create_group('labels-cols') h5_labels_rows.create_dataset('label_mat', data=labels_rows) h5_labels_cols.create_dataset('label_mat', data=labels_cols) # ============== # COMPUTE KERNEL # ============== soap.silence() log << log.mg << "Computing kernel ..." << log.endl # Options basekernel_type = options["basekernel"]["type"] basekernel_options = options["basekernel"][basekernel_type] basekernel = BaseKernelFactory[basekernel_type](basekernel_options) topkernel_type = options["topkernel"]["type"] topkernel_options = options["topkernel"][topkernel_type] topkernel = TopKernelFactory[topkernel_type](topkernel_options, basekernel) log << "Base-kernel" << basekernel_type << json.dumps( basekernel_options, indent=2, sort_keys=True) << log.endl log << "Top-kernel" << topkernel_type << json.dumps( topkernel_options, indent=2, sort_keys=True) << log.endl # (Re-)load graphs graphs_rows = [ Graph().load_from_h5(h5_graphs_rows['%06d' % i]) for i in range(len(h5_graphs_rows)) ] graphs_cols = [ Graph().load_from_h5(h5_graphs_cols['%06d' % i]) for i in range(len(h5_graphs_cols)) ] # Compute pair-wise kernel entries kmat = np.zeros((len(graphs_rows), len(graphs_cols)), dtype='float32') for i in range(len(graphs_rows)): for j in range(len(graphs_cols)): kmat[i, j] = topkernel.compute(graphs_rows[i], graphs_cols[j], log) # Store h5_kernel = h5.create_group('kernel') h5_kernel.create_dataset('kernel_mat', data=kmat) h5.close() kmat_new = kmat.copy() dmat = (1. - kmat**2 + 1e-10)**0.5 return kmat, dmat
def calc_soap_similarity(ase_atoms_list, tmp_folder, h5_filename, options): h5_file = os.path.abspath(os.path.normpath(os.path.join(tmp_folder, h5_filename))) log = soap.soapy.momo.osio # Setup HDF5 storage h5 = h5py.File(h5_file, 'w') # ================ # Read ASE configs # ================ # TODO Load structures here as <list(ase.atoms)> # configs = read_filter_configs( # 'configs.xyz', # index=':', # filter_types=False, # types=[], # do_remove_duplicates=False, # key=lambda c: c.info['label'], # log=log) # LOAD DATA, CONVERT TO ASE # ============== # Compute graphs # ============== log << log.mg << "Computing graphs ..." << log.endl # Options descriptor_type = options['descriptor']['type'] descriptor_options = options['descriptor'][descriptor_type] log << "Descriptor" << descriptor_type << json.dumps( descriptor_options, indent=2, sort_keys=True) << log.endl # Compute graphs = [ compute_graph( config=config, descriptor_options=descriptor_options, log=log ) for config in ase_atoms_list ] # Store h5_graphs = h5.create_group('/graphs') h5.attrs['descriptor_options'] = json.dumps(descriptor_options) for g in graphs: g.save_to_h5(h5_graphs) # Optional: Save labels labels = np.zeros((len(h5_graphs),), dtype=[('idx', 'i8'), ('tag', 'a32')]) for g in h5_graphs.iteritems(): idx = int(g[0]) tag = g[1].attrs['label'] g_info = json.loads(g[1].attrs['graph_info']) labels[idx] = (idx, tag) h5_labels = h5.create_group('labels') h5_labels.create_dataset('label_mat', data=labels) # ============== # COMPUTE KERNEL # ============== soap.silence() log << log.mg << "Computing kernel ..." << log.endl # Options basekernel_type = options["basekernel"]["type"] basekernel_options = options["basekernel"][basekernel_type] basekernel = BaseKernelFactory[basekernel_type](basekernel_options) topkernel_type = options["topkernel"]["type"] topkernel_options = options["topkernel"][topkernel_type] topkernel = TopKernelFactory[topkernel_type](topkernel_options, basekernel) log << "Base-kernel" << basekernel_type << json.dumps( basekernel_options, indent=2, sort_keys=True) << log.endl log << "Top-kernel" << topkernel_type << json.dumps( topkernel_options, indent=2, sort_keys=True) << log.endl # (Re-)load graphs graphs = [ Graph().load_from_h5(h5_graphs['%06d' % i]) for i in range(len(h5_graphs)) ] # Compute pair-wise kernel entries kmat = np.zeros((len(graphs), len(graphs)), dtype='float32') for i in range(len(graphs)): for j in range(i, len(graphs)): kmat[i, j] = topkernel.compute(graphs[i], graphs[j], log) kmat = kmat + kmat.T np.fill_diagonal(kmat, kmat.diagonal() * 0.5) # Store h5_kernel = h5.create_group('kernel') h5_kernel.create_dataset('kernel_mat', data=kmat) h5.close() ### kmat_new = kmat.copy() np.fill_diagonal(kmat_new, 0.5 * kmat_new.diagonal()) # logger.debug(kmat[0:7,0:7]) dmat = (1. - kmat**2 + 1e-10)**0.5 return kmat, dmat
log.AddArg("rank_coeff", typ=float, default=0.2, help="'moment', 'rank' or 'mixed'") log.AddArg("bootstrap", typ=int, default=0, help='Number of bootstrap samples used when calculating feature statistics') log.AddArg("sample", typ=int, default=1000, help='Number of random samples for constructing null distributions') log.AddArg("tail_fraction", typ=float, default=0.01, help='Tail percentile used for calculating exceedences') log.AddArg("seed", typ=int, default=830292, help='RNG seed') log.AddArg("verbose", typ=bool, default=False, help='Verbosity toggle') # ANALYSIS log.AddArg("ranking", typ=str, default="cov", help="Ranking criterion: 'cov', 'cov*q', 'dcov*dq'") log.AddArg("decompose", typ=str, default="", help="Covariance decomposition: 'global', 'top', 'global+top'") options = log.Parse() # PREPARATORY STEPS state_base = rmt.State().unpickle(options.statefile) cv_iterator = soap.soapy.npfga.cv_iterator[options.cv_mode](state_base, options) np.random.seed(options.seed) if not options.verbose: soap.silence() if not os.path.exists(options.output_folder): log >> 'mkdir -p %s' % options.output_folder # Generate random feature matrices log << log.mg << "Sample random feature matrices" << log.endl rand_IX_list_base = soap.soapy.npfga.RandomizeMatrix(method="perm_within_cols").sample( X=state_base["IX"], n_samples=options.sample, seed=None, log=log) # Generate graph fgraph = soap.soapy.npfga.generate_graph( state_base["features_with_props"], uop_list=options.uop,