def initialize(self, comm, config, args): rank = comm.Get_rank() self.config = config self.args = args struct_file = reader.open(args.struct_file) self.struct_filename = struct_file.filename self.npoints = struct_file.nlines self.idxs_thread = p_index.get_idxs_thread(comm, self.npoints) if hasattr(struct_file, '_skip'): # multi-thread reading coords_thread = struct_file.readlines(self.idxs_thread) self.coords = np.vstack(comm.allgather(coords_thread)) else: # serial reading if rank == 0: self.coords = struct_file.readlines() else: self.coords = None self.coords = comm.bcast(self.coords, root=0) logging.info('input coordinates loaded') self.initialize_local_scale() self.initialize_weights() self.initialize_metric() self.neigs = 10
def get_weights(self, comm): rank = comm.Get_rank() size = comm.Get_size() # compute the distance matrix if needed if self.distance_matrix is None: logging.info("distance matrix not provided, computing it...") #idxs_thread = p_index.get_idxs_thread(comm, self.npoints) #npoints_thread = len(idxs_thread) #coords_thread = np.array([self.coords[idx] for idx in idxs_thread]) self.idxs_thread, self.npoints_per_thread, self.offsets_per_thread = p_index.get_idxs_thread(comm, self.npoints) npoints_thread = self.npoints_per_thread[rank] coords_thread = np.array([self.coords[idx] for idx in self.idxs_thread]) DistanceMatrix = mt.DistanceMatrix(coords_thread, self.coords, metric=self.metric, metric_prms=self.metric_prms) self.distance_matrix = np.vstack(comm.allgather(DistanceMatrix.distance_matrix)) logging.info("distance matrix computed") else: if any(self.distance_matrix.shape[idx] != self.npoints for idx in [0, 1]): logging.error("distance matrix provided doesn't match the number of coordinates") self.sigma = self.initialize_sigma_values(self.sigma, self.ksigma) # invert kernel matrix kernel_matrix = self.fit_function(self.distance_matrix, self.sigma) # perform singular valu decomposition U, s, V = np.linalg.svd(kernel_matrix) sinv = 1/s # filter noisy singular values values sinv[s<0.02*np.max(s)] = 0 inverse_kernel_matrix = np.dot(np.dot(V.T, np.diag(sinv)), U.T) weights = np.dot(inverse_kernel_matrix, self.values) logging.info("kernel matrix inverted") return weights
def initialize(self, comm, config, args): rank = comm.Get_rank() size = comm.Get_size() self.config = config self.args = args filename = args.struct_file[0] self.struct_filename = filename if rank == 0: f = x2h.Fasu(args.topfile, args.struct_file, selection=args.selection) c = x2h.Cofasu(f) self.coords = np.array(c.x, dtype=np.double) * 0.1 self.coords = np.swapaxes(self.coords, 1, 2) else: self.coords = None self.coords = comm.bcast(self.coords, root=0) print rank, self.coords.shape self.npoints = self.coords.shape[0] self.natoms = self.coords.shape[2] self.idxs_thread, self.npoints_per_thread, self.offsets_per_thread = p_index.get_idxs_thread(comm, self.npoints) logging.info('input coordinates loaded') self.initialize_local_scale() self.initialize_weights() self.initialize_metric()
def initialize(self, comm, config, args): rank = comm.Get_rank() size = comm.Get_size() self.config = config self.args = args filename = args.struct_file[0] self.struct_filename = filename self.npoints,self.natoms = coord_reader.get_nframes_natoms(filename) if coord_reader.supports_parallel_reading(filename): # read coordinates in parallel self.idxs_thread, self.npoints_per_thread, self.offsets_per_thread = p_index.get_idxs_thread(comm, self.npoints) coords_thread = coord_reader.get_coordinates(filename, idxs=self.idxs_thread) coords_ravel = coords_thread.ravel() ravel_lengths, ravel_offsets = p_index.get_ravel_offsets(self.npoints_per_thread,self.natoms) coordstemp = np.zeros(self.npoints*3*self.natoms, dtype='float') start = MPI.Wtime() comm.Allgatherv(coords_ravel, (coordstemp, ravel_lengths, ravel_offsets, MPI.DOUBLE)) self.coords = coordstemp.reshape((self.npoints,3,self.natoms)) else: # serial reading if rank == 0: self.coords = coord_reader.get_coordinates(filename) else: self.coords = np.zeros((self.npoints,3,self.natoms),dtype=np.double) comm.Bcast(self.coords, root=0) logging.info('input coordinates loaded') self.initialize_local_scale() self.initialize_weights() self.initialize_metric()
def get_weights(self, comm): rank = comm.Get_rank() size = comm.Get_size() # compute the distance matrix if needed if self.distance_matrix is None: logging.info("distance matrix not provided, computing it...") #idxs_thread = p_index.get_idxs_thread(comm, self.npoints) #npoints_thread = len(idxs_thread) #coords_thread = np.array([self.coords[idx] for idx in idxs_thread]) self.idxs_thread, self.npoints_per_thread, self.offsets_per_thread = p_index.get_idxs_thread( comm, self.npoints) npoints_thread = self.npoints_per_thread[rank] coords_thread = np.array( [self.coords[idx] for idx in self.idxs_thread]) DistanceMatrix = mt.DistanceMatrix(coords_thread, self.coords, metric=self.metric, metric_prms=self.metric_prms) self.distance_matrix = np.vstack( comm.allgather(DistanceMatrix.distance_matrix)) logging.info("distance matrix computed") else: if any(self.distance_matrix.shape[idx] != self.npoints for idx in [0, 1]): logging.error( "distance matrix provided doesn't match the number of coordinates" ) self.sigma = self.initialize_sigma_values(self.sigma, self.ksigma) # invert kernel matrix kernel_matrix = self.fit_function(self.distance_matrix, self.sigma) # perform singular valu decomposition U, s, V = np.linalg.svd(kernel_matrix) sinv = 1 / s # filter noisy singular values values sinv[s < 0.02 * np.max(s)] = 0 inverse_kernel_matrix = np.dot(np.dot(V.T, np.diag(sinv)), U.T) weights = np.dot(inverse_kernel_matrix, self.values) logging.info("kernel matrix inverted") return weights
def initialize(self, comm, config, args): rank = comm.Get_rank() size = comm.Get_size() self.config = config self.args = args filename = args.struct_file[0] self.struct_filename = filename self.npoints, self.natoms = coord_reader.get_nframes_natoms(filename) if coord_reader.supports_parallel_reading(filename): # read coordinates in parallel self.idxs_thread, self.npoints_per_thread, self.offsets_per_thread = p_index.get_idxs_thread( comm, self.npoints) coords_thread = coord_reader.get_coordinates(filename, idxs=self.idxs_thread) coords_ravel = coords_thread.ravel() ravel_lengths, ravel_offsets = p_index.get_ravel_offsets( self.npoints_per_thread, self.natoms) coordstemp = np.zeros(self.npoints * 3 * self.natoms, dtype='float') start = MPI.Wtime() comm.Allgatherv( coords_ravel, (coordstemp, ravel_lengths, ravel_offsets, MPI.DOUBLE)) self.coords = coordstemp.reshape((self.npoints, 3, self.natoms)) else: # serial reading if rank == 0: self.coords = coord_reader.get_coordinates(filename) else: self.coords = np.zeros((self.npoints, 3, self.natoms), dtype=np.double) comm.Bcast(self.coords, root=0) logging.info('input coordinates loaded') self.initialize_local_scale() self.initialize_weights() self.initialize_metric()
def initialize(self, comm, config, args): rank = comm.Get_rank() size = comm.Get_size() self.config = config self.args = args filename = args.struct_file[0] self.struct_filename = filename self.npoints,self.natoms = coord_reader.get_nframes_natoms(filename) if coord_reader.supports_parallel_reading(filename): # read coordinates in parallel self.idxs_thread, self.npoints_per_thread, self.offsets_per_thread = p_index.get_idxs_thread(comm, self.npoints) coords_thread = coord_reader.get_coordinates(filename, idxs=self.idxs_thread) coords_ravel = coords_thread.ravel() ravel_lengths, ravel_offsets = p_index.get_ravel_offsets(self.npoints_per_thread,self.natoms) coordstemp = np.zeros(self.npoints*3*self.natoms, dtype='float') start = MPI.Wtime() comm.Allgatherv(coords_ravel, (coordstemp, ravel_lengths, ravel_offsets, MPI.DOUBLE)) self.coords = coordstemp.reshape((self.npoints,3,self.natoms)) else: # serial reading if rank == 0: self.coords = coord_reader.get_coordinates(filename) else: self.coords = np.zeros((self.npoints,3,self.natoms),dtype=np.double) comm.Bcast(self.coords, root=0) logging.info('input coordinates loaded') # load file of values valfile = reader.open(args.valfile) self.values = valfile.readlines() format = os.path.splitext(args.valfile)[1] if format == '.ev': self.fitdcs = True else: self.fitdcs = False if len(self.values.shape) > 2: raise ValueError('file of values should contain a single column') self.initialize_metric() self.function = config.get(self.args.section,'function') self.status_sigma = config.get(self.args.section,'status') if self.status_sigma == 'constant': self.sigma = config.getfloat(self.args.section,'sigma') self.ksigma = None elif self.status_sigma == 'kneighbor': self.sigma = None self.ksigma = config.getint(self.args.section,'ksigma') if args.embed_file is not None: #embed_file = reader.open(args.embed_file) #self.embed_filename = embed_file.filename #self.npoints_embed = embed_file.nlines #self.idxs_thread_embed = p_index.get_idxs_thread_v(comm, self.npoints_embed) #if hasattr(embed_file, '_skip'): # multi-thread reading # coords_thread_embed = embed_file.readlines(self.idxs_thread_embed) # self.coords_embed = np.vstack(comm.allgather(coords_thread_embed)) #else: # serial reading # if rank == 0: # self.coords_embed = embed_file.readlines() # else: # self.coords_embed = None # self.coords_embed = comm.bcast(self.coords_embed, root=0) filename_embed = args.embed_file self.embed_filename = filename_embed self.npoints_embed,self.natoms_embed = coord_reader.get_nframes_natoms(filename_embed) if coord_reader.supports_parallel_reading(filename_embed): # read coordinates in parallel self.idxs_thread_embed, self.npoints_per_thread_embed, self.offsets_per_thread_embed = p_index.get_idxs_thread(comm, self.npoints_embed) coords_thread_embed = coord_reader.get_coordinates(filename_embed, idxs=self.idxs_thread_embed) coords_ravel_embed = coords_thread_embed.ravel() ravel_lengths, ravel_offsets = p_index.get_ravel_offsets(self.npoints_per_thread_embed,self.natoms_embed) coordstemp_embed = np.zeros(self.npoints_embed*3*self.natoms_embed, dtype='float') comm.Allgatherv(coords_ravel, (coordstemp_embed, ravel_lengths, ravel_offsets, MPI.DOUBLE)) self.coords_embed = coordstemp_embed.reshape((self.npoints_embed,3,self.natoms_embed)) else: # serial reading if rank == 0: self.coords_embed = coord_reader.get_coordinates(filename_embed) else: self.coords_embed = np.zeros((self.npoints_embed,3,self.natoms_embed),dtype=np.double) comm.Bcast(self.coords_embed, root=0)
def initialize(self, comm, config, args): rank = comm.Get_rank() size = comm.Get_size() self.config = config self.args = args filename = args.struct_file[0] self.struct_filename = filename self.npoints, self.natoms = coord_reader.get_nframes_natoms(filename) if coord_reader.supports_parallel_reading(filename): # read coordinates in parallel self.idxs_thread, self.npoints_per_thread, self.offsets_per_thread = p_index.get_idxs_thread( comm, self.npoints) coords_thread = coord_reader.get_coordinates(filename, idxs=self.idxs_thread) coords_ravel = coords_thread.ravel() ravel_lengths, ravel_offsets = p_index.get_ravel_offsets( self.npoints_per_thread, self.natoms) coordstemp = np.zeros(self.npoints * 3 * self.natoms, dtype='float') start = MPI.Wtime() comm.Allgatherv( coords_ravel, (coordstemp, ravel_lengths, ravel_offsets, MPI.DOUBLE)) self.coords = coordstemp.reshape((self.npoints, 3, self.natoms)) else: # serial reading if rank == 0: self.coords = coord_reader.get_coordinates(filename) else: self.coords = np.zeros((self.npoints, 3, self.natoms), dtype=np.double) comm.Bcast(self.coords, root=0) logging.info('input coordinates loaded') # load file of values valfile = reader.open(args.valfile) self.values = valfile.readlines() format = os.path.splitext(args.valfile)[1] if format == '.ev': self.fitdcs = True else: self.fitdcs = False if len(self.values.shape) > 2: raise ValueError( 'file of values should contain a single column') self.initialize_metric() self.function = config.get(self.args.section, 'function') self.status_sigma = config.get(self.args.section, 'status') if self.status_sigma == 'constant': self.sigma = config.getfloat(self.args.section, 'sigma') self.ksigma = None elif self.status_sigma == 'kneighbor': self.sigma = None self.ksigma = config.getint(self.args.section, 'ksigma') if args.embed_file is not None: #embed_file = reader.open(args.embed_file) #self.embed_filename = embed_file.filename #self.npoints_embed = embed_file.nlines #self.idxs_thread_embed = p_index.get_idxs_thread_v(comm, self.npoints_embed) #if hasattr(embed_file, '_skip'): # multi-thread reading # coords_thread_embed = embed_file.readlines(self.idxs_thread_embed) # self.coords_embed = np.vstack(comm.allgather(coords_thread_embed)) #else: # serial reading # if rank == 0: # self.coords_embed = embed_file.readlines() # else: # self.coords_embed = None # self.coords_embed = comm.bcast(self.coords_embed, root=0) filename_embed = args.embed_file self.embed_filename = filename_embed self.npoints_embed, self.natoms_embed = coord_reader.get_nframes_natoms( filename_embed) if coord_reader.supports_parallel_reading(filename_embed): # read coordinates in parallel self.idxs_thread_embed, self.npoints_per_thread_embed, self.offsets_per_thread_embed = p_index.get_idxs_thread( comm, self.npoints_embed) coords_thread_embed = coord_reader.get_coordinates( filename_embed, idxs=self.idxs_thread_embed) coords_ravel_embed = coords_thread_embed.ravel() ravel_lengths, ravel_offsets = p_index.get_ravel_offsets( self.npoints_per_thread_embed, self.natoms_embed) coordstemp_embed = np.zeros(self.npoints_embed * 3 * self.natoms_embed, dtype='float') comm.Allgatherv(coords_ravel, (coordstemp_embed, ravel_lengths, ravel_offsets, MPI.DOUBLE)) self.coords_embed = coordstemp_embed.reshape( (self.npoints_embed, 3, self.natoms_embed)) else: # serial reading if rank == 0: self.coords_embed = coord_reader.get_coordinates( filename_embed) else: self.coords_embed = np.zeros( (self.npoints_embed, 3, self.natoms_embed), dtype=np.double) comm.Bcast(self.coords_embed, root=0)