def _compute_all_needed_parametrization_ids(self, fixtureobj): stack = [(fixtureobj.info.id, [fixtureobj.info.id], set([fixtureobj.info.id]))] returned = OrderedSet() while stack: fixture_id, path, visited = stack.pop() if fixture_id in self._all_needed_parametrization_ids_by_fixture_id: returned.update( self. _all_needed_parametrization_ids_by_fixture_id[fixture_id]) continue fixture = self._fixtures_by_id[fixture_id] if fixture.parametrization_ids: assert isinstance(fixture.parametrization_ids, OrderedSet) returned.update(fixture.parametrization_ids) if fixture.keyword_arguments: for needed in itervalues(fixture.keyword_arguments): if needed.is_parameter(): continue needed_id = needed.info.id if needed_id in visited: self._raise_cyclic_dependency_error( fixtureobj, path, needed_id) stack.append((needed_id, path + [needed_id], visited | set([needed_id]))) return returned
class Taggable(ABC): def __init__(self): self._tags = OrderedSet() @abstractmethod def get_required_tags(self): raise NotImplementedError def get_tags(self): """Gets the comma-separated list of tags. Returns: A comma separated list of tags, or an empty string if there are no tags. """ return ','.join(self.get_required_tags() | self._tags) def set_tags(self, tags): """Sets from a comma-separated list of tags. """ if tags is None: # Weird, but for compatibility with the Java version return self._tags.clear() self._tags.update(tags.split(',')) def add_tags(self, *tags): self._tags.update(tags) def remove_tag(self, tag): if tag: self._tags.remove(tag) def has_tag(self, tag): return tag in self._tags
def ndim_pareto_ranking(scores): S = defaultdict(OrderedSet) # p is superior to individuals in S[p] n = defaultdict(lambda: 0) # p is dominated by n[p] individuals fronts = defaultdict(OrderedSet) # individuals in front 1 are fronts[1] # Create domination map for p in scores: for q in scores - {p}: if p.dominates(q): S[p].update({q}) elif q.dominates(p): n[p] += 1 if n[p] == 0: fronts[1].update({p}) # Iteratively eliminate current Pareto frontier, and find members of next best i = 1 while fronts[i]: Q = OrderedSet() # Next front for p in fronts[i]: for q in S[p]: n[q] -= 1 if n[q] == 0: Q.update({q}) i += 1 fronts[i] = Q return fronts
def iter_variations(self): needed_ids = OrderedSet() for fixture in self._needed_fixtures: needed_ids.update(self._store.get_all_needed_fixture_ids(fixture)) parametrizations = [self._store.get_fixture_by_id(param_id) for param_id in needed_ids] if not needed_ids: yield Variation(self._store, {}, self._name_bindings.copy()) return for value_indices in itertools.product(*(xrange(len(p.values)) for p in parametrizations)): yield self._build_variation(parametrizations, value_indices)
def get_default_trackers(): trackers = OrderedSet() # Our main one first main_announce_url = app.config.get('MAIN_ANNOUNCE_URL') if main_announce_url: trackers.add(main_announce_url) # and finally our tracker list trackers.update(default_trackers()) return list(trackers)
def _merge_node(self, node_list): _node_set = set([]) merged_list = [] for n in node_list: if n not in _node_set: node_set = OrderedSet([n]) node_set.update(self._get_pe_cluster(n)) _node_set.update(node_set) if merged_list and node_set.intersection(merged_list[-1]): merged_list[-1].update(node_set) else: merged_list.append(node_set) return merged_list
def iter_variations(self): needed_ids = OrderedSet() self._needed_fixtures.sort(key=lambda x: x.info.scope, reverse=True) for fixture in self._needed_fixtures: needed_ids.update(self._store.get_all_needed_fixture_ids(fixture)) parametrizations = [ self._store.get_fixture_by_id(param_id) for param_id in needed_ids ] if not needed_ids: yield Variation(self._store, {}, {}) return for value_indices in itertools.product(*(xrange(len(p.values)) for p in parametrizations)): yield self._build_variation(parametrizations, value_indices)
def find_recvs(fro): # Find all the Receivers fro depends on visit = OrderedSet() recvs = OrderedSet() visit.add(fro) while visit: v = visit.pop() if isinstance(v, RecvOp): recvs.add(v) visit |= get_iterable(v.send_node()) else: if hasattr(v, 'args'): visit.update(v.args) return recvs
def get_trackers(torrent): trackers = OrderedSet() # Our main one first main_announce_url = app.config.get('MAIN_ANNOUNCE_URL') if main_announce_url: trackers.add(main_announce_url) # then the user ones torrent_trackers = torrent.trackers for torrent_tracker in torrent_trackers: trackers.add(torrent_tracker.tracker.uri) # and finally our tracker list trackers.update(default_trackers()) return list(trackers)
def build_opgraph(self): """ Build Ngraph opgraph from Neon's computation graph. """ computation = self.computation_op self.transformer.graph_passes = [] self.transformer.graph_passes += [PybindWrapperGenerator(self.transformer, self)] self.custom_passes = [] self.custom_passes += [PybindScopePass(self)] computation_op_list = OrderedSet() if isinstance(computation.returns, collections.Container): computation_op_list.update(list(computation.returns)) elif isinstance(computation.returns, Op): computation_op_list.update(list([computation.returns])) for custom_pass in self.custom_passes: custom_pass(computation_op_list) self.transformer.run_registered_graph_passes(computation_op_list)
def _compute_all_needed_parametrization_ids(self, fixtureobj): stack = [(fixtureobj.info.id, [fixtureobj.info.id], set([fixtureobj.info.id]))] returned = OrderedSet() while stack: fixture_id, path, visited = stack.pop() if fixture_id in self._all_needed_parametrization_ids_by_fixture_id: returned.update(self._all_needed_parametrization_ids_by_fixture_id[fixture_id]) continue fixture = self._fixtures_by_id[fixture_id] if fixture.parametrization_ids: assert isinstance(fixture.parametrization_ids, OrderedSet) returned.update(fixture.parametrization_ids) if fixture.fixture_kwargs: for needed_id in itervalues(fixture.fixture_kwargs): if needed_id in visited: self._raise_cyclic_dependency_error(fixtureobj, path, needed_id) stack.append((needed_id, path + [needed_id], visited | set([needed_id]))) return returned
def prepare_annotations(self): # compile annotations data fields = OrderedSet([ 'start_time', 'commit_time', 'commit', 'commit_message', 'working_directory', 'command', 'elapsed_time']) annotations_list = list() for annotation in self.repo.get_annotations(): fields.update(annotation) annotations_list.append(annotation) annotations_list.sort(key = lambda x: x['start_time'], reverse=True) return fields, annotations_list
def run_strategy(self, graph): '''Will try to take and keep big cities around it.''' neighbors = self.get_neighbors(graph) # first look if there is any big city within the civ and secure the borders move = OrderedSet((patch for patch in self.secure_borders(graph, neighbors))) if len(move) >= len(self.patches)/3: return move[:len(self.patches)/3 or 1] # now look if there are big cities in the vecinity (2 degree) and try to get them move.update((patch for patch in self.get_big_cities(graph, neighbors))) if len(move) >= len(self.patches)/3: return move[:len(self.patches)/3 or 1] # go to naive strategy move.update((patch for patch in self.neighbors_move(neighbors))) if len(move) >= len(self.patches)/3: return move[:len(self.patches)/3 or 1] return move
def json_to_csv(input_file_path, output_file_path, fields_dict): global allowedFields #json = input_file.read() allowedFields = [k for k, v in fields_dict.items() if v == True] headers_written = False for parsed_json in loadJSON_multipleLines(input_file_path): dicts = json_to_dicts(parsed_json) #dicts_to_csv(dicts, output_csv) if headers_written == False: #keys = set(chain.from_iterable([o.keys() for o in dicts])) #keys = set() keys = OrderedSet() for k in [o.keys() for o in dicts]: keys.update(k) output_csv = csv.DictWriter(output_file_path, fieldnames=keys) output_csv.writeheader() headers_written = True output_csv.writerows(dicts)
def comm_path_exists(fro, to): """ Find a path from fro to to, including paths non-explicit edges from a Receiver to its Sender. Note- this is a non-standard traversal, as most traversals stop at a Receiver. """ # TODO: Issue #1865 does this correctly handle traversing multiple send-recv junctions # from fro to to? visit = OrderedSet(fro.args) visit.add(fro) while visit: v = visit.pop() if v == to: return True if isinstance(v, RecvOp): visit |= get_iterable(v.send_node()) else: visit.update(v.args) return False
def get_trackers_and_webseeds(torrent): trackers = OrderedSet() webseeds = OrderedSet() # Our main one first main_announce_url = app.config.get('MAIN_ANNOUNCE_URL') if main_announce_url: trackers.add(main_announce_url) # then the user ones torrent_trackers = torrent.trackers # here be webseeds too for torrent_tracker in torrent_trackers: tracker = torrent_tracker.tracker # separate potential webseeds if tracker.is_webseed: webseeds.add(tracker.uri) else: trackers.add(tracker.uri) # and finally our tracker list trackers.update(default_trackers()) return list(trackers), list(webseeds)
def extract_context_names(query): """ Extract contexts list from query object This is to mimic json-e render() function Given using only $eval in query Args: query (dict): query object to extract context Returns: contexts (set): set of context names """ contexts = OrderedSet() if isinstance(query, dict): for k, v in query.items(): if k == "$eval": contexts.update(G_ZEROCONTEXT_EVALUATOR.parse(v)) else: contexts.update(extract_context_names(v)) elif isinstance(query, list): for v in query: contexts.update(extract_context_names(v)) return contexts
class Target(object): """Target function for cosym analysis. Attributes: dim (int): The number of dimensions used in the analysis. """ def __init__( self, intensities, lattice_ids, weights=None, min_pairs=None, lattice_group=None, dimensions=None, nproc=1, ): r""""Intialise a Target object. Args: intensities (cctbx.miller.array): The intensities on which to perform cosym anaylsis. lattice_ids (scitbx.array_family.flex.int): An array of equal size to `intensities` which maps each reflection to a given lattice (dataset). weights (str): Optionally include weights in the target function. Allowed values are `None`, "count" and "standard_error". The default is to use no weights. If "count" is set, then weights are equal to the number of pairs of reflections used in calculating each value of the rij matrix. If "standard_error" is used, then weights are defined as :math:`w_{ij} = 1/s`, where :math:`s = \sqrt{(1-r_{ij}^2)/(n-2)}`. See also http://www.sjsu.edu/faculty/gerstman/StatPrimer/correlation.pdf. min_pairs (int): Only calculate the correlation coefficient between two datasets if they have more than `min_pairs` of common reflections. lattice_group (cctbx.sgtbx.space_group): Optionally set the lattice group to be used in the analysis. dimensions (int): Optionally override the number of dimensions to be used in the analysis. If not set, then the number of dimensions used is equal to the greater of 2 or the number of symmetry operations in the lattice group. nproc (int): number of processors to use for computing the rij matrix. """ if weights is not None: assert weights in ("count", "standard_error") self._weights = weights self._min_pairs = min_pairs self._nproc = nproc data = intensities.customized_copy(anomalous_flag=False) cb_op_to_primitive = data.change_of_basis_op_to_primitive_setting() data = data.change_basis(cb_op_to_primitive).map_to_asu() order = flex.sort_permutation(lattice_ids) sorted_lattice_id = flex.select(lattice_ids, order) sorted_data = data.data().select(order) sorted_indices = data.indices().select(order) self._lattice_ids = sorted_lattice_id self._data = data.customized_copy(indices=sorted_indices, data=sorted_data) assert isinstance(self._data.indices(), type(flex.miller_index())) assert isinstance(self._data.data(), type(flex.double())) # construct a lookup for the separate lattices last_id = -1 self._lattices = flex.int() for n, lid in enumerate(self._lattice_ids): if lid != last_id: last_id = lid self._lattices.append(n) self._sym_ops = OrderedSet(["x,y,z"]) self._lattice_group = lattice_group self._sym_ops.update({op.as_xyz() for op in self._generate_twin_operators()}) if dimensions is None: dimensions = max(2, len(self._sym_ops)) self.set_dimensions(dimensions) self._lattice_group = copy.deepcopy(self._data.space_group()) for sym_op in self._sym_ops: self._lattice_group.expand_smx(sym_op) self._patterson_group = self._lattice_group.build_derived_patterson_group() logger.debug( "Lattice group: %s (%i symops)" % (self._lattice_group.info().symbol_and_number(), len(self._lattice_group)) ) logger.debug( "Patterson group: %s" % self._patterson_group.info().symbol_and_number() ) self._compute_rij_wij() def set_dimensions(self, dimensions): """Set the number of dimensions for analysis. Args: dimensions (int): The number of dimensions to be used. """ self.dim = dimensions def _generate_twin_operators(self, lattice_symmetry_max_delta=5.0): # see also mmtbx.scaling.twin_analyses.twin_laws if self._lattice_group is None: cb_op_to_minimum_cell = self._data.change_of_basis_op_to_minimum_cell() minimum_cell_symmetry = self._data.crystal_symmetry().change_basis( cb_op=cb_op_to_minimum_cell ) self._lattice_group = sgtbx.lattice_symmetry.group( reduced_cell=minimum_cell_symmetry.unit_cell(), max_delta=lattice_symmetry_max_delta, ) intensity_symmetry = minimum_cell_symmetry.reflection_intensity_symmetry( anomalous_flag=self._data.anomalous_flag() ) cb_op = cb_op_to_minimum_cell.inverse() else: cb_op = sgtbx.change_of_basis_op() intensity_symmetry = self._data.reflection_intensity_symmetry() operators = [] for partition in cctbx.sgtbx.cosets.left_decomposition( g=self._lattice_group, h=intensity_symmetry.space_group() .build_derived_acentric_group() .make_tidy(), ).partitions[1:]: if partition[0].r().determinant() > 0: operators.append(cb_op.apply(partition[0])) return operators def _lattice_lower_upper_index(self, lattice_id): lower_index = self._lattices[lattice_id] upper_index = None if lattice_id < len(self._lattices) - 1: upper_index = self._lattices[lattice_id + 1] else: assert lattice_id == len(self._lattices) - 1 return lower_index, upper_index def _compute_rij_wij(self, use_cache=True): """Compute the rij_wij matrix.""" n_lattices = self._lattices.size() n_sym_ops = len(self._sym_ops) NN = n_lattices * n_sym_ops self.rij_matrix = flex.double(flex.grid(NN, NN), 0.0) if self._weights is None: self.wij_matrix = None else: self.wij_matrix = flex.double(flex.grid(NN, NN), 0.0) indices = {} space_group_type = self._data.space_group().type() for cb_op in self._sym_ops: cb_op = sgtbx.change_of_basis_op(cb_op) indices_reindexed = cb_op.apply(self._data.indices()) miller.map_to_asu(space_group_type, False, indices_reindexed) indices[cb_op.as_xyz()] = indices_reindexed def _compute_rij_matrix_one_row_block(i): rij_cache = {} n_sym_ops = len(self._sym_ops) NN = n_lattices * n_sym_ops rij_row = [] rij_col = [] rij_data = [] if self._weights is not None: wij_row = [] wij_col = [] wij_data = [] else: wij = None i_lower, i_upper = self._lattice_lower_upper_index(i) intensities_i = self._data.data()[i_lower:i_upper] for j in range(n_lattices): j_lower, j_upper = self._lattice_lower_upper_index(j) intensities_j = self._data.data()[j_lower:j_upper] for k, cb_op_k in enumerate(self._sym_ops): cb_op_k = sgtbx.change_of_basis_op(cb_op_k) indices_i = indices[cb_op_k.as_xyz()][i_lower:i_upper] for kk, cb_op_kk in enumerate(self._sym_ops): if i == j and k == kk: # don't include correlation of dataset with itself continue cb_op_kk = sgtbx.change_of_basis_op(cb_op_kk) ik = i + (n_lattices * k) jk = j + (n_lattices * kk) key = (i, j, str(cb_op_k.inverse() * cb_op_kk)) if use_cache and key in rij_cache: cc, n = rij_cache[key] else: indices_j = indices[cb_op_kk.as_xyz()][j_lower:j_upper] matches = miller.match_indices(indices_i, indices_j) pairs = matches.pairs() isel_i = pairs.column(0) isel_j = pairs.column(1) isel_i = isel_i.select( self._patterson_group.epsilon(indices_i.select(isel_i)) == 1 ) isel_j = isel_j.select( self._patterson_group.epsilon(indices_j.select(isel_j)) == 1 ) corr = flex.linear_correlation( intensities_i.select(isel_i), intensities_j.select(isel_j), ) if corr.is_well_defined(): cc = corr.coefficient() n = corr.n() rij_cache[key] = (cc, n) else: cc = None n = None if ( n is None or cc is None or (self._min_pairs is not None and n < self._min_pairs) ): continue if self._weights == "count": wij_row.extend([ik, jk]) wij_col.extend([jk, ik]) wij_data.extend([n, n]) elif self._weights == "standard_error": assert n > 2 # http://www.sjsu.edu/faculty/gerstman/StatPrimer/correlation.pdf se = math.sqrt((1 - cc ** 2) / (n - 2)) wij = 1 / se wij_row.extend([ik, jk]) wij_col.extend([jk, ik]) wij_data.extend([wij, wij]) rij_row.append(ik) rij_col.append(jk) rij_data.append(cc) rij = sparse.coo_matrix((rij_data, (rij_row, rij_col)), shape=(NN, NN)) if self._weights is not None: wij = sparse.coo_matrix((wij_data, (wij_row, wij_col)), shape=(NN, NN)) return rij, wij args = [(i,) for i in range(n_lattices)] results = easy_mp.parallel_map( _compute_rij_matrix_one_row_block, args, processes=self._nproc, iterable_type=easy_mp.posiargs, method="multiprocessing", ) rij_matrix = None wij_matrix = None for i, (rij, wij) in enumerate(results): if rij_matrix is None: rij_matrix = rij else: rij_matrix += rij if wij is not None: if wij_matrix is None: wij_matrix = wij else: wij_matrix += wij self.rij_matrix = flex.double(rij_matrix.todense()) if wij_matrix is not None: import numpy as np self.wij_matrix = flex.double(wij_matrix.todense().astype(np.float64)) return self.rij_matrix, self.wij_matrix def compute_functional(self, x): """Compute the target function at coordinates `x`. Args: x (scitbx.array_family.flex.double): a flattened list of the N-dimensional vectors, i.e. coordinates in the first dimension are stored first, followed by the coordinates in the second dimension, etc. Returns: f (float): The value of the target function at coordinates `x`. """ assert (x.size() // self.dim) == (self._lattices.size() * len(self._sym_ops)) inner = self.rij_matrix.deep_copy() NN = x.size() // self.dim for i in range(self.dim): coord = x[i * NN : (i + 1) * NN] outer_prod = coord.matrix_outer_product(coord) inner -= outer_prod elements = inner * inner if self.wij_matrix is not None: elements = self.wij_matrix * elements f = 0.5 * flex.sum(elements) return f def compute_gradients_fd(self, x, eps=1e-6): """Compute the gradients at coordinates `x` using finite differences. Args: x (scitbx.array_family.flex.double): a flattened list of the N-dimensional vectors, i.e. coordinates in the first dimension are stored first, followed by the coordinates in the second dimension, etc. eps (float): The value of epsilon to use in finite difference calculations. Returns: grad (scitbx.array_family.flex.double): The gradients of the target function with respect to the parameters. """ grad = flex.double(x.size(), 0) for i in range(grad.size()): x[i] += eps # x + eps fp = self.compute_functional(x) x[i] -= 2 * eps # x - eps fm = self.compute_functional(x) x[i] += eps # reset to original values grad[i] += (fp - fm) / (2 * eps) return grad def compute_functional_and_gradients(self, x): """Compute the target function and gradients at coordinates `x`. Args: x (scitbx.array_family.flex.double): a flattened list of the N-dimensional vectors, i.e. coordinates in the first dimension are stored first, followed by the coordinates in the second dimension, etc. Returns: Tuple[float, scitbx.array_family.flex.double]: f: The value of the target function at coordinates `x`. grad: The gradients of the target function with respect to the parameters. """ f = self.compute_functional(x) grad = flex.double() if self.wij_matrix is not None: wrij_matrix = self.wij_matrix * self.rij_matrix else: wrij_matrix = self.rij_matrix coords = [] NN = x.size() // self.dim for i in range(self.dim): coords.append(x[i * NN : (i + 1) * NN]) # term 1 for i in range(self.dim): grad.extend(wrij_matrix.matrix_multiply(coords[i])) for i in range(self.dim): tmp_array = flex.double() tmp = coords[i].matrix_outer_product(coords[i]) if self.wij_matrix is not None: tmp = self.wij_matrix * tmp for j in range(self.dim): tmp_array.extend(tmp.matrix_multiply(coords[j])) grad -= tmp_array grad *= -2 # grad_fd = self.compute_gradients_fd(x) # assert grad.all_approx_equal_relatively(grad_fd, relative_error=1e-4) return f, grad def curvatures(self, x): """Compute the curvature of the target function. Args: x (scitbx.array_family.flex.double): a flattened list of the N-dimensional vectors, i.e. coordinates in the first dimension are stored first, followed by the coordinates in the second dimension, etc. Returns: curvs (scitbx.array_family.flex.double): The curvature of the target function with respect to the parameters. """ coords = [] NN = x.size() // self.dim for i in range(self.dim): coords.append(x[i * NN : (i + 1) * NN]) curvs = flex.double() if self.wij_matrix is not None: wij = self.wij_matrix else: wij = flex.double(self.rij_matrix.accessor(), 1) for i in range(self.dim): curvs.extend(wij.matrix_multiply(coords[i] * coords[i])) curvs *= 2 return curvs def curvatures_fd(self, x, eps=1e-6): """Compute the curvatures at coordinates `x` using finite differences. Args: x (scitbx.array_family.flex.double): a flattened list of the N-dimensional vectors, i.e. coordinates in the first dimension are stored first, followed by the coordinates in the second dimension, etc. eps (float): The value of epsilon to use in finite difference calculations. Returns: curvs (scitbx.array_family.flex.double): The curvature of the target function with respect to the parameters. """ f = self.compute_functional(x) curvs = flex.double(x.size(), 0) for i in range(curvs.size()): x[i] += eps # x + eps fp = self.compute_functional(x) x[i] -= 2 * eps # x - eps fm = self.compute_functional(x) x[i] += eps # reset to original values curvs[i] += (fm - 2 * f + fp) / (eps ** 2) return curvs def get_sym_ops(self): """Get the list of symmetry operations used in the analysis. Returns: List[cctbx.sgtbx.rt_mx]: The list of symmetry operations. """ return self._sym_ops
class TargetWithCustomSymops(TargetWithFastRij): def __init__( self, intensities, lattice_ids, weights=None, min_pairs=3, lattice_group=None, dimensions=None, nproc=None, twin_axes=None, twin_angles=None, cb_op=None, ): ''' A couple extra init arguments permit testing user-defined reindexing ops. twin_axes is a list of tuples, e.g. [(0,1,0)] means the twin axis is b. twin_angles is a corresponding list to define the rotations; 2 is a twofold rotation etc. cb_op is the previously determined transformation from the input cells to the minimum cell. The data have already been transformed by this operator, so we transform the twin operators before testing them. ''' if nproc is not None: warnings.warn("nproc is deprecated", DeprecationWarning) self._nproc = 1 if weights is not None: assert weights in ("count", "standard_error") self._weights = weights self._min_pairs = min_pairs data = intensities.customized_copy(anomalous_flag=False) cb_op_to_primitive = data.change_of_basis_op_to_primitive_setting() data = data.change_basis(cb_op_to_primitive).map_to_asu() # Convert to uint64 avoids crashes on Windows when later constructing # flex.size_t (https://github.com/cctbx/cctbx_project/issues/591) order = lattice_ids.argsort().astype(np.uint64) sorted_data = data.data().select(flex.size_t(order)) sorted_indices = data.indices().select(flex.size_t(order)) self._lattice_ids = lattice_ids[order] self._data = data.customized_copy(indices=sorted_indices, data=sorted_data) assert isinstance(self._data.indices(), type(cctbx_flex.miller_index())) assert isinstance(self._data.data(), type(cctbx_flex.double())) # construct a lookup for the separate lattices self._lattices = np.array([ np.where(self._lattice_ids == i)[0][0] for i in np.unique(self._lattice_ids) ]) self.sym_ops = OrderedSet(["x,y,z"]) self._lattice_group = lattice_group auto_sym_ops = self._generate_twin_operators() if twin_axes is not None: assert len(twin_axes) == len(twin_angles) lds = [literal_description(cb_op.apply(op)) for op in auto_sym_ops] ld_tuples = [(ld.r_info.ev(), ld.r_info.type()) for ld in lds] i_symops_to_keep = [] for i, (axis, angle) in enumerate(ld_tuples): if axis in twin_axes and angle in twin_angles: i_symops_to_keep.append(i) assert len(i_symops_to_keep) == len(twin_axes) sym_ops = [auto_sym_ops[i] for i in i_symops_to_keep] else: sym_ops = auto_sym_ops self.sym_ops.update(op.as_xyz() for op in sym_ops) if dimensions is None: dimensions = max(2, len(self.sym_ops)) self.set_dimensions(dimensions) self._lattice_group = copy.deepcopy(self._data.space_group()) for sym_op in self.sym_ops: self._lattice_group.expand_smx(sym_op) self._patterson_group = self._lattice_group.build_derived_patterson_group( ) logger.debug( "Lattice group: %s (%i symops)", self._lattice_group.info().symbol_and_number(), len(self._lattice_group), ) logger.debug("Patterson group: %s", self._patterson_group.info().symbol_and_number()) self.rij_matrix, self.wij_matrix = self._compute_rij_wij()
def write_to_csv(self, output_dir, delimiter): """ Export entities together with retrieved data to a CSV file. :param output_dir: Target directory for generated CSV file. :param delimiter: Column delimiter in CSV file (typically ','). """ if len(self.entities) == 0: logger.info("Nothing to export.") return if not os.path.exists(output_dir): os.makedirs(output_dir) if self.chunk_size != 0: filename = '{0}_{1}-{2}.csv'.format(self.configuration.name, str(self.start_index), str(self.start_index + min(len(self.entities), self.chunk_size) - 1)) else: filename = '{0}.csv'.format(self.configuration.name) file_path = os.path.join(output_dir, filename) # write entity list to UTF8-encoded CSV file (see also http://stackoverflow.com/a/844443) with codecs.open(file_path, 'w', encoding='utf8') as fp: logger.info('Exporting entities to ' + file_path + '...') writer = csv.writer(fp, delimiter=delimiter) # check if input and output parameters overlap -> validate these parameters later validation_parameters = OrderedSet(self.configuration.input_parameters).intersection( OrderedSet(self.configuration.output_parameter_mapping.keys()) ) # get column names for CSV file (start with input parameters) column_names = self.configuration.input_parameters + [ parameter for parameter in self.configuration.output_parameter_mapping.keys() if parameter not in validation_parameters ] # check if an output parameter has been added and/or removed by a callback function and update column names parameters_removed = OrderedSet() parameters_added = OrderedSet() for entity in self.entities: parameters_removed.update(OrderedSet(self.configuration.output_parameter_mapping.keys()).difference( OrderedSet(entity.output_parameters.keys())) ) parameters_added.update(OrderedSet(entity.output_parameters.keys()).difference( OrderedSet(self.configuration.output_parameter_mapping.keys())) ) for parameter in parameters_removed: column_names.remove(parameter) for parameter in parameters_added: column_names.append(parameter) # write header of CSV file writer.writerow(column_names) for entity in self.entities: try: row = OrderedDict.fromkeys(column_names) # check validation parameters for parameter in validation_parameters: if entity.output_parameters[parameter]: if str(entity.input_parameters[parameter]) == str(entity.output_parameters[parameter]): logger.info("Validation of parameter " + parameter + " successful for entity " + str(entity) + ".") else: logger.error("Validation of parameter " + parameter + " failed for entity " + str(entity) + ": Expected: " + str(entity.input_parameters[parameter]) + ", Actual: " + str(entity.output_parameters[parameter]) + ". Retrieved value will be exported.") else: logger.error("Validation of parameter " + parameter + " failed for entity " + str(entity) + ": Empty value.") # write data for column_name in column_names: if column_name in entity.output_parameters.keys(): row[column_name] = entity.output_parameters[column_name] elif column_name in entity.input_parameters.keys(): row[column_name] = entity.input_parameters[column_name] if len(row) == len(column_names): writer.writerow(list(row.values())) else: raise IllegalArgumentError(str(len(column_names) - len(row)) + " parameter(s) is/are missing " "for entity " + str(entity)) except UnicodeEncodeError: logger.error("Encoding error while writing data for entity: " + str(entity)) logger.info(str(len(self.entities)) + ' entities have been exported.')
def calc_build(self,_seen=None): """decides if it needs to be built by recursively asking it's prerequisites the same question. _seen is an internal variable (a set) for optimising the search. I'll be relying on the set being a mutable container in order to not have to pass it explicitly back up the call stack.""" #There is an oportunity to optimise calculations to occur only once for rules that are called multiple #times by using a shared (global) buildseq + _already_seen set, or by passing those structures into #the calc_build method call. #i.e. if (self in buildseq) or (self in _already_seen): return buildseq #Or we can memoize this method #updated_only should be calculated during build calculation time (rather than build time) for consistancy. self.updated_only #force evaluation of lazy property buildseq = OrderedSet() _seen = set() if not _seen else _seen _seen.add(self) # this will also solve any circular dependency issues! for req in self.order_only: if not os.path.exists(req): reqrule = Rule.get(req,None) #super(ExplicitRule,self).get(req,None) if reqrule: if reqrule not in _seen: buildseq.update(reqrule.calc_build()) else: warnings.warn('rule for %r has already been processed' %req,stacklevel=2) else: warnings.warn('%r has an order_only prerequisite with no rule' %self,stacklevel=2) for req in self.reqs: reqrule = Rule.get(req,None) #super(ExplicitRule,self).get(req,None) if reqrule: if reqrule not in _seen: buildseq.update(reqrule.calc_build()) else: warnings.warn('rule for %r has already been processed' %req,stacklevel=2) else: #perform checks try: self.get_mtime(req) #get_mtime is cached to reduce number of file accesses except OSError as e: raise AssertionError("No rule or file found for %r for targets: %r" %(req,self.targets)) if len(buildseq)==0: if self.PHONY or any([not os.path.exists(target) for target in self.targets]): buildseq.add(self) else: oldest_target = self._oldest_target #Since none of the prerequisites have rules that need to update, we can assume #that all prerequisites should be real files (phony rules always update which #should skip this section of code). Hence non-existing files imply an malformed build #file. for req in self.reqs: try: req_mtime = self.get_mtime(req) if req_mtime > oldest_target: buildseq.add(self) break except OSError as e: raise AssertionError("A non file prerequisite was found (%r) for targets %r in wrong code path" %(req,self.targets)) else: buildseq.add(self) return buildseq
elif ret == 0: log.write("Couldn't get result for %d on %s\n" % (start_roll, temp_dob.strftime("%d/%m/%y"))) # raise AssertionError("Result not found") temp_dob = get_next_date(temp_dob) if temp_dob < END_DOB: print("DOB for %d out of range" % start_roll) start_roll = get_next_roll(start_roll) temp_dob = START_DOB else: print("Successfully got %d for %s" % (start_roll, temp_dob.strftime("%d/%m/%y"))) log.write("Successfully got %d for %s\n" % (start_roll, temp_dob.strftime("%d/%m/%y"))) profile = parse_html(content) flattened = flatten_json(profile, delim="__") fieldnames.update(flattened.keys()) allRows.append(flattened) if start_roll not in record: record[start_roll] = str(temp_dob) record_file.write("%d,%s\n" % (start_roll, str(temp_dob))) record_file.flush() start_roll = get_next_roll(start_roll) temp_dob = START_DOB with open("cbse_data.csv", "w") as file: csvwriter = csv.DictWriter(file, fieldnames=fieldnames) csvwriter.writeheader() for obj in allRows: csvwriter.writerow(obj)
class Target: """Target function for cosym analysis. Attributes: dim (int): The number of dimensions used in the analysis. """ def __init__( self, intensities, lattice_ids, weights=None, min_pairs=None, lattice_group=None, dimensions=None, nproc=1, ): r"""Intialise a Target object. Args: intensities (cctbx.miller.array): The intensities on which to perform cosym anaylsis. lattice_ids (np.ndarray): An array of equal size to `intensities` which maps each reflection to a given lattice (dataset). weights (str): Optionally include weights in the target function. Allowed values are `None`, "count" and "standard_error". The default is to use no weights. If "count" is set, then weights are equal to the number of pairs of reflections used in calculating each value of the rij matrix. If "standard_error" is used, then weights are defined as :math:`w_{ij} = 1/s`, where :math:`s = \sqrt{(1-r_{ij}^2)/(n-2)}`. See also http://www.sjsu.edu/faculty/gerstman/StatPrimer/correlation.pdf. min_pairs (int): Only calculate the correlation coefficient between two datasets if they have more than `min_pairs` of common reflections. lattice_group (cctbx.sgtbx.space_group): Optionally set the lattice group to be used in the analysis. dimensions (int): Optionally override the number of dimensions to be used in the analysis. If not set, then the number of dimensions used is equal to the greater of 2 or the number of symmetry operations in the lattice group. nproc (int): number of processors to use for computing the rij matrix. """ if weights is not None: assert weights in ("count", "standard_error") self._weights = weights self._min_pairs = min_pairs self._nproc = nproc data = intensities.customized_copy(anomalous_flag=False) cb_op_to_primitive = data.change_of_basis_op_to_primitive_setting() data = data.change_basis(cb_op_to_primitive).map_to_asu() # Convert to uint64 avoids crashes on Windows when later constructing # flex.size_t (https://github.com/cctbx/cctbx_project/issues/591) order = lattice_ids.argsort().astype(np.uint64) sorted_data = data.data().select(flex.size_t(order)) sorted_indices = data.indices().select(flex.size_t(order)) self._lattice_ids = lattice_ids[order] self._data = data.customized_copy(indices=sorted_indices, data=sorted_data) assert isinstance(self._data.indices(), type(flex.miller_index())) assert isinstance(self._data.data(), type(flex.double())) # construct a lookup for the separate lattices self._lattices = np.array( [ np.where(self._lattice_ids == i)[0][0] for i in np.unique(self._lattice_ids) ] ) self.sym_ops = OrderedSet(["x,y,z"]) self._lattice_group = lattice_group self.sym_ops.update(op.as_xyz() for op in self._generate_twin_operators()) if dimensions is None: dimensions = max(2, len(self.sym_ops)) self.set_dimensions(dimensions) self._lattice_group = copy.deepcopy(self._data.space_group()) for sym_op in self.sym_ops: self._lattice_group.expand_smx(sym_op) self._patterson_group = self._lattice_group.build_derived_patterson_group() logger.debug( "Lattice group: %s (%i symops)", self._lattice_group.info().symbol_and_number(), len(self._lattice_group), ) logger.debug( "Patterson group: %s", self._patterson_group.info().symbol_and_number() ) self.rij_matrix, self.wij_matrix = self._compute_rij_wij() def set_dimensions(self, dimensions): """Set the number of dimensions for analysis. Args: dimensions (int): The number of dimensions to be used. """ self.dim = dimensions def _generate_twin_operators(self, lattice_symmetry_max_delta=5.0): # see also mmtbx.scaling.twin_analyses.twin_laws if self._lattice_group is None: cb_op_to_minimum_cell = self._data.change_of_basis_op_to_minimum_cell() minimum_cell_symmetry = self._data.crystal_symmetry().change_basis( cb_op=cb_op_to_minimum_cell ) self._lattice_group = sgtbx.lattice_symmetry.group( reduced_cell=minimum_cell_symmetry.unit_cell(), max_delta=lattice_symmetry_max_delta, ) intensity_symmetry = minimum_cell_symmetry.reflection_intensity_symmetry( anomalous_flag=self._data.anomalous_flag() ) cb_op = cb_op_to_minimum_cell.inverse() else: cb_op = sgtbx.change_of_basis_op() intensity_symmetry = self._data.reflection_intensity_symmetry() operators = [] for partition in cctbx.sgtbx.cosets.left_decomposition( g=self._lattice_group, h=intensity_symmetry.space_group() .build_derived_acentric_group() .make_tidy(), ).partitions[1:]: if partition[0].r().determinant() > 0: operators.append(cb_op.apply(partition[0])) return operators def _lattice_lower_upper_index(self, lattice_id): lower_index = int(self._lattices[lattice_id]) upper_index = None if lattice_id < len(self._lattices) - 1: upper_index = int(self._lattices[lattice_id + 1]) else: assert lattice_id == len(self._lattices) - 1 return lower_index, upper_index def _compute_rij_wij(self, use_cache=True): """Compute the rij_wij matrix. Rij is a symmetric matrix of size (n x m, n x m), where n is the number of datasets and m is the number of symmetry operations. It is composed of (m, m) blocks of size (n, n), where each block contains the correlation coefficients between cb_op_k applied to datasets 1..N with cb_op_kk applied to datasets 1.. N. If `use_cache=True`, then an optimisation is made to reflect the fact some elements of the matrix are equivalent, i.e.: CC[(a, cb_op_k), (b, cb_op_kk)] == CC[(a,), (b, cb_op_k.inverse() * cb_op_kk)] """ n_lattices = len(self._lattices) # Pre-calculate miller indices after application of each cb_op. Only calculate # this once per cb_op instead of on-the-fly every time we need it. indices = {} space_group_type = self._data.space_group().type() for cb_op in self.sym_ops: cb_op = sgtbx.change_of_basis_op(cb_op) indices_reindexed = cb_op.apply(self._data.indices()) miller.map_to_asu(space_group_type, False, indices_reindexed) indices[cb_op.as_xyz()] = indices_reindexed def _compute_rij_matrix_one_row_block(i): rij_cache = {} n_sym_ops = len(self.sym_ops) NN = n_lattices * n_sym_ops rij_row = [] rij_col = [] rij_data = [] if self._weights is not None: wij_row = [] wij_col = [] wij_data = [] else: wij = None i_lower, i_upper = self._lattice_lower_upper_index(i) intensities_i = self._data.data()[i_lower:i_upper] for j in range(n_lattices): j_lower, j_upper = self._lattice_lower_upper_index(j) intensities_j = self._data.data()[j_lower:j_upper] for k, cb_op_k in enumerate(self.sym_ops): cb_op_k = sgtbx.change_of_basis_op(cb_op_k) indices_i = indices[cb_op_k.as_xyz()][i_lower:i_upper] for kk, cb_op_kk in enumerate(self.sym_ops): if i == j and k == kk: # don't include correlation of dataset with itself continue cb_op_kk = sgtbx.change_of_basis_op(cb_op_kk) ik = i + (n_lattices * k) jk = j + (n_lattices * kk) key = (i, j, str(cb_op_k.inverse() * cb_op_kk)) if use_cache and key in rij_cache: cc, n = rij_cache[key] else: indices_j = indices[cb_op_kk.as_xyz()][j_lower:j_upper] matches = miller.match_indices(indices_i, indices_j) pairs = matches.pairs() isel_i = pairs.column(0) isel_j = pairs.column(1) isel_i = isel_i.select( self._patterson_group.epsilon(indices_i.select(isel_i)) == 1 ) isel_j = isel_j.select( self._patterson_group.epsilon(indices_j.select(isel_j)) == 1 ) corr = flex.linear_correlation( intensities_i.select(isel_i), intensities_j.select(isel_j), ) if corr.is_well_defined(): cc = corr.coefficient() n = corr.n() rij_cache[key] = (cc, n) else: cc = None n = None if ( n is None or cc is None or (self._min_pairs is not None and n < self._min_pairs) ): continue if self._weights == "count": wij_row.extend([ik, jk]) wij_col.extend([jk, ik]) wij_data.extend([n, n]) elif self._weights == "standard_error": assert n > 2 # http://www.sjsu.edu/faculty/gerstman/StatPrimer/correlation.pdf se = math.sqrt((1 - cc ** 2) / (n - 2)) wij = 1 / se wij_row.extend([ik, jk]) wij_col.extend([jk, ik]) wij_data.extend([wij, wij]) rij_row.append(ik) rij_col.append(jk) rij_data.append(cc) rij = sparse.coo_matrix((rij_data, (rij_row, rij_col)), shape=(NN, NN)) if self._weights is not None: wij = sparse.coo_matrix((wij_data, (wij_row, wij_col)), shape=(NN, NN)) return rij, wij args = [(i,) for i in range(n_lattices)] results = easy_mp.parallel_map( _compute_rij_matrix_one_row_block, args, processes=self._nproc, iterable_type=easy_mp.posiargs, method="multiprocessing", ) rij_matrix = None wij_matrix = None for i, (rij, wij) in enumerate(results): if rij_matrix is None: rij_matrix = rij else: rij_matrix += rij if wij is not None: if wij_matrix is None: wij_matrix = wij else: wij_matrix += wij rij_matrix = rij_matrix.todense().astype(np.float64) if wij_matrix is not None: wij_matrix = wij_matrix.todense().astype(np.float64) return rij_matrix, wij_matrix def compute_functional(self, x: np.ndarray) -> float: """Compute the target function at coordinates `x`. Args: x (np.ndarray): a flattened list of the N-dimensional vectors, i.e. coordinates in the first dimension are stored first, followed by the coordinates in the second dimension, etc. Returns: f (float): The value of the target function at coordinates `x`. """ assert (x.size // self.dim) == (len(self._lattices) * len(self.sym_ops)) inner = np.copy(self.rij_matrix) NN = x.size // self.dim for i in range(self.dim): coord = x[i * NN : (i + 1) * NN] outer_prod = np.outer(coord, coord) inner -= outer_prod elements = np.power(inner, 2) if self.wij_matrix is not None: elements = np.multiply(self.wij_matrix, elements) f = 0.5 * elements.sum() return f def compute_gradients_fd(self, x: np.ndarray, eps=1e-6) -> np.ndarray: """Compute the gradients at coordinates `x` using finite differences. Args: x (np.ndarray): a flattened list of the N-dimensional vectors, i.e. coordinates in the first dimension are stored first, followed by the coordinates in the second dimension, etc. eps (float): The value of epsilon to use in finite difference calculations. Returns: grad (np.ndarray): The gradients of the target function with respect to the parameters. """ x = copy.deepcopy(x) grad = np.zeros(x.shape) for i in range(x.size): x[i] += eps # x + eps fp = self.compute_functional(x) x[i] -= 2 * eps # x - eps fm = self.compute_functional(x) x[i] += eps # reset to original values grad[i] += (fp - fm) / (2 * eps) return grad def compute_gradients(self, x: np.ndarray) -> np.ndarray: """Compute the gradients of the target function at coordinates `x`. Args: x (np.ndarray): a flattened list of the N-dimensional vectors, i.e. coordinates in the first dimension are stored first, followed by the coordinates in the second dimension, etc. Returns: Tuple[float, np.ndarray]: f: The value of the target function at coordinates `x`. grad: The gradients of the target function with respect to the parameters. """ grad = np.empty(x.shape) if self.wij_matrix is not None: wrij_matrix = np.multiply(self.wij_matrix, self.rij_matrix) else: wrij_matrix = self.rij_matrix coords = [] NN = x.size // self.dim for i in range(self.dim): coords.append(x[i * NN : (i + 1) * NN]) # term 1 for i in range(self.dim): grad[i * NN : (i + 1) * NN] = np.matmul(wrij_matrix, coords[i]) for i in range(self.dim): tmp_array = np.empty(x.shape) tmp = np.outer(coords[i], coords[i]) if self.wij_matrix is not None: tmp = np.multiply(self.wij_matrix, tmp) for j in range(self.dim): tmp_array[j * NN : (j + 1) * NN] = np.matmul(tmp, coords[j]) grad -= tmp_array grad *= -2 return grad def curvatures(self, x: np.ndarray) -> np.ndarray: """Compute the curvature of the target function at coordinates `x`. Args: x (np.ndarray): a flattened list of the N-dimensional vectors, i.e. coordinates in the first dimension are stored first, followed by the coordinates in the second dimension, etc. Returns: curvs (np.ndarray): The curvature of the target function with respect to the parameters. """ NN = x.size // self.dim curvs = np.empty(x.shape) if self.wij_matrix is not None: wij = self.wij_matrix else: wij = np.ones(self.rij_matrix.shape) for i in range(self.dim): curvs[i * NN : (i + 1) * NN] = np.matmul( wij, np.power(x[i * NN : (i + 1) * NN], 2) ) curvs *= 2 return curvs def curvatures_fd(self, x: np.ndarray, eps=1e-6) -> np.ndarray: """Compute the curvatures at coordinates `x` using finite differences. Args: x (np.ndarray): a flattened list of the N-dimensional vectors, i.e. coordinates in the first dimension are stored first, followed by the coordinates in the second dimension, etc. eps (float): The value of epsilon to use in finite difference calculations. Returns: curvs (np.ndarray): The curvature of the target function with respect to the parameters. """ x = copy.deepcopy(x) f = self.compute_functional(x) curvs = np.zeros(x.shape) for i in range(x.size): x[i] += eps # x + eps fp = self.compute_functional(x) x[i] -= 2 * eps # x - eps fm = self.compute_functional(x) x[i] += eps # reset to original values curvs[i] += (fm - 2 * f + fp) / (eps ** 2) return curvs def get_sym_ops(self): """Get the list of symmetry operations used in the analysis. Returns: List[cctbx.sgtbx.rt_mx]: The list of symmetry operations. """ warnings.warn( "get_sym_ops() is deprecated, use sym_ops property instead", DeprecationWarning, ) return self.sym_ops
class HetrComputation(Computation): """ Lightweight wrapper class for handling runtime execution of child computations for Hetr """ def __init__(self, hetr, comp_op): self.child_computations = dict() self.transformer = hetr self.send_nodes = hetr.send_nodes self.computation = comp_op # self.returns could be replaced by comp_op.returns if it were expressed as a set self.returns = OrderedSet() if isinstance(comp_op.returns, collections.Container): self.returns.update(list(comp_op.returns)) elif isinstance(comp_op.returns, Op): self.returns.update(list([comp_op.returns])) # if one of the requested results is marked as distributed across devices, # wrap it in a ResultOp to facilitate DistributedPass inserting a gather operation new_returns = OrderedSet() for op in self.returns: if 'device_id' in op.metadata and \ isinstance(op.metadata['device_id'], (list, tuple)): op.metadata['is_split_op'] = True new_result = ResultOp(device_id=0, args=tuple([op])) op.metadata['hetr_replaced_by'] = new_result new_result.metadata['replaces_op'] = op new_returns.add(new_result) else: new_returns.add(op) # Do Hetr passes pass_ops = new_returns | OrderedSet(self.computation.parameters) for graph_pass in self.transformer.graph_passes: pass_ops = pass_ops | OrderedSet(hetr.send_nodes) graph_pass.do_pass(pass_ops, self.transformer) # hack around new TensorValueOp that wraps AssignableTensorOp # autogenerated by creating a ComputationOp: for p in self.computation.parameters: if isinstance(p, TensorValueOp): p.metadata.update(p.states_read[0].metadata) # simplify by already having asynctrans made by passes for t_name, async_trans in iteritems( self.transformer.child_transformers): my_params = [(g_pos, p) for g_pos, p in enumerate(self.computation.parameters) if p.metadata['transformer'] == t_name] my_ops = [ op for op in self.send_nodes | new_returns if op.metadata['transformer'] == t_name ] transform_ops = [ op.args[0] if isinstance(op, ResultOp) else op for op in my_ops ] async_comp = async_trans.computation( transform_ops, tuple([p for pos, p in my_params])) async_comp.param_idx = [g_pos for g_pos, p in my_params] # when there is a ResultOp, hack around it async_comp.returns = dict() for i, op in enumerate(my_ops): if op in self.returns and 'hetr_replaced_by' not in op.metadata: async_comp.returns[op] = i elif 'replaces_op' in op.metadata and op.metadata[ 'replaces_op'] in self.returns: async_comp.returns[op.metadata['replaces_op']] = i self.child_computations[t_name] = async_comp def __call__(self, *args, **kwargs): """ Executes child computations in parallel. :arg args: list of values to the placeholders specified in __init__ *args :return: tuple of return values, one per return specified in __init__ returns list. """ args = self.unpack_args_or_feed_dict(args, kwargs) for child in itervalues(self.child_computations): child.feed_input([args[i] for i in child.param_idx]) return_vals = dict() for child in itervalues(self.child_computations): return_vals.update(child.get_results()) if isinstance(self.computation.returns, Op): return return_vals[self.computation.returns] elif isinstance(self.computation.returns, collections.Set): return return_vals elif isinstance(self.computation.returns, collections.Sequence): return tuple(return_vals[op] for op in self.computation.returns) else: return None
class HetrComputation(Computation): """ Lightweight wrapper class for handling runtime execution of child computations for Hetr """ def __init__(self, hetr, computation_op): self.child_computations = dict() self.transformer = hetr # clear send_nodes for multiple computations if hetr.send_nodes: hetr.send_nodes.clear() self.send_nodes = hetr.send_nodes self.computation_op = computation_op # self.returns could be replaced by comp_op.returns if it were expressed as a set self.returns = OrderedSet() if isinstance(computation_op.returns, collections.Container): self.returns.update(list(computation_op.returns)) elif isinstance(computation_op.returns, Op): self.returns.update(list([computation_op.returns])) # if one of the requested results is marked as distributed across devices, # wrap it in a ResultOp to facilitate DistributedPass inserting a gather operation new_returns = OrderedSet() for op in self.returns: if 'device_id' in op.metadata and \ isinstance(op.metadata['device_id'], (list, tuple)): op.metadata['is_split_op'] = True new_result = ResultOp(device_id=0, args=tuple([op])) op.metadata['hetr_replaced_by'] = new_result new_result.metadata['replaces_op'] = op new_returns.add(new_result) else: new_returns.add(op) # Do Hetr passes logger.info('Running graph passes'), pass_ops = new_returns | OrderedSet(self.computation_op.parameters) for graph_pass in self.transformer.graph_passes: pass_ops = pass_ops | OrderedSet(hetr.send_nodes) graph_pass.do_pass(ops=pass_ops) # hack around new TensorValueOp that wraps AssignableTensorOp # autogenerated by creating a ComputationOp: for p in self.computation_op.parameters: if isinstance(p, TensorValueOp): p.metadata.update(p.states_read[0].metadata) logger.info('Launching child processes'), # assume all children are the same type # and all GPUs are in one chassis num_process = len(self.transformer.child_transformers) ppn = 1 if self.transformer.default_device == 'cpu' else num_process self.transformer.mpilauncher.launch(num_process, ppn) self.transformer.setup_child_transformers(num_process) def is_my_op(op, name): op_trans = op.metadata['transformer'] return name == op_trans or name in op_trans logger.info('Serializaing computation graph'), # build whole_graph once to avoid slow serialization once per worker # split whole pb message into list of smaller chunks # gRPC prefers sending smaller messages placeholders = [p for p in self.computation_op.parameters] all_returns = [o for o in self.send_nodes | new_returns] transform_returns = [ o.args[0] if isinstance(o, ResultOp) else o for o in all_returns ] whole_graph = Op.all_op_references(transform_returns + placeholders) pb_whole_graph = [] pb_ops, pb_edges = [], [] for i, o in enumerate(whole_graph): pb_ops.append(op_to_protobuf(o)) add_edges(pb_edges, pb_ops, o) if (i != 0 and i % _OPS_PER_MSG == 0) or (i == len(whole_graph) - 1): pb_whole_graph.append((pb_ops, pb_edges)) pb_ops, pb_edges = [], [] t_placeholders, t_returns = {}, {} for t_name in self.transformer.child_transformers.keys(): t_placeholders[t_name] = [ p for p in placeholders if is_my_op(p, t_name) ] t_returns[t_name] = [r for r in all_returns if is_my_op(r, t_name)] # create_computation is an async call using gPRC future # allowing child transformers to create computation simultaneously # get_computation waits the corresponding request to finish logger.info('Creating remote computations'), for t_name, trans in iteritems(self.transformer.child_transformers): logger.debug('child transformer: {}'.format(t_name)) trans.build_transformer() transform_ops = [ r.args[0] if isinstance(r, ResultOp) else r for r in t_returns[t_name] ] trans.create_computation(pb_whole_graph, transform_ops, t_placeholders[t_name]) for t_name, trans in iteritems(self.transformer.child_transformers): comp = trans.get_computation() comp.param_idx = [ g_pos for g_pos, p in enumerate(self.computation_op.parameters) if is_my_op(p, t_name) ] # when there is a ResultOp, hack around it comp.returns = dict() for i, op in enumerate(t_returns[t_name]): if op in self.returns and 'hetr_replaced_by' not in op.metadata: comp.returns[op] = i elif 'replaces_op' in op.metadata and op.metadata[ 'replaces_op'] in self.returns: comp.returns[op.metadata['replaces_op']] = i self.child_computations[t_name] = comp def __call__(self, *args, **kwargs): """ Executes child computations in parallel. :arg args: list of values to the placeholders specified in __init__ *args :return: tuple of return values, one per return specified in __init__ returns list. """ args = self.unpack_args_or_feed_dict(args, kwargs) for child in itervalues(self.child_computations): child.feed_input([args[i] for i in child.param_idx]) return_vals = dict() for child in itervalues(self.child_computations): return_vals.update(child.get_results()) if isinstance(self.computation_op.returns, Op): return return_vals[self.computation_op.returns] elif isinstance(self.computation_op.returns, (collections.Sequence, OrderedSet)): return tuple(return_vals[op] for op in self.computation_op.returns) elif isinstance(self.computation_op.returns, collections.Set): return return_vals else: return None
def get_unseenedges(self): edges = OrderedSet() edges.update(self.iter_edges()) return edges - self.seen_edges
class Target: """Target function for cosym analysis. Attributes: dim (int): The number of dimensions used in the analysis. """ def __init__( self, intensities, lattice_ids, weights=None, min_pairs=3, lattice_group=None, dimensions=None, nproc=None, ): r"""Initialise a Target object. Args: intensities (cctbx.miller.array): The intensities on which to perform cosym analysis. lattice_ids (np.ndarray): An array of equal size to `intensities` which maps each reflection to a given lattice (dataset). weights (str): Optionally include weights in the target function. Allowed values are `None`, "count" and "standard_error". The default is to use no weights. If "count" is set, then weights are equal to the number of pairs of reflections used in calculating each value of the rij matrix. If "standard_error" is used, then weights are defined as :math:`w_{ij} = 1/s`, where :math:`s = \sqrt{(1-r_{ij}^2)/(n-2)}`. See also http://www.sjsu.edu/faculty/gerstman/StatPrimer/correlation.pdf. min_pairs (int): Only calculate the correlation coefficient between two datasets if they have more than `min_pairs` of common reflections. lattice_group (cctbx.sgtbx.space_group): Optionally set the lattice group to be used in the analysis. dimensions (int): Optionally override the number of dimensions to be used in the analysis. If not set, then the number of dimensions used is equal to the greater of 2 or the number of symmetry operations in the lattice group. nproc (int): Deprecated """ if nproc is not None: warnings.warn("nproc is deprecated", UserWarning) if weights is not None: assert weights in ("count", "standard_error") self._weights = weights self._min_pairs = min_pairs data = intensities.customized_copy(anomalous_flag=False) cb_op_to_primitive = data.change_of_basis_op_to_primitive_setting() data = data.change_basis(cb_op_to_primitive).map_to_asu() # Convert to uint64 avoids crashes on Windows when later constructing # flex.size_t (https://github.com/cctbx/cctbx_project/issues/591) order = lattice_ids.argsort().astype(np.uint64) sorted_data = data.data().select(flex.size_t(order)) sorted_indices = data.indices().select(flex.size_t(order)) self._lattice_ids = lattice_ids[order] self._data = data.customized_copy(indices=sorted_indices, data=sorted_data) assert isinstance(self._data.indices(), type(flex.miller_index())) assert isinstance(self._data.data(), type(flex.double())) # construct a lookup for the separate lattices self._lattices = np.array([ np.where(self._lattice_ids == i)[0][0] for i in np.unique(self._lattice_ids) ]) self.sym_ops = OrderedSet(["x,y,z"]) self._lattice_group = lattice_group self.sym_ops.update(op.as_xyz() for op in self._generate_twin_operators()) if dimensions is None: dimensions = max(2, len(self.sym_ops)) self.set_dimensions(dimensions) self._lattice_group = copy.deepcopy(self._data.space_group()) for sym_op in self.sym_ops: self._lattice_group.expand_smx(sym_op) self._patterson_group = self._lattice_group.build_derived_patterson_group( ) logger.debug( "Lattice group: %s (%i symops)", self._lattice_group.info().symbol_and_number(), len(self._lattice_group), ) logger.debug("Patterson group: %s", self._patterson_group.info().symbol_and_number()) self.rij_matrix, self.wij_matrix = self._compute_rij_wij() def set_dimensions(self, dimensions): """Set the number of dimensions for analysis. Args: dimensions (int): The number of dimensions to be used. """ self.dim = dimensions def _generate_twin_operators(self, lattice_symmetry_max_delta=5.0): # see also mmtbx.scaling.twin_analyses.twin_laws if self._lattice_group is None: cb_op_to_minimum_cell = self._data.change_of_basis_op_to_minimum_cell( ) minimum_cell_symmetry = self._data.crystal_symmetry().change_basis( cb_op=cb_op_to_minimum_cell) self._lattice_group = sgtbx.lattice_symmetry.group( reduced_cell=minimum_cell_symmetry.unit_cell(), max_delta=lattice_symmetry_max_delta, ) intensity_symmetry = minimum_cell_symmetry.reflection_intensity_symmetry( anomalous_flag=self._data.anomalous_flag()) cb_op = cb_op_to_minimum_cell.inverse() else: cb_op = sgtbx.change_of_basis_op() intensity_symmetry = self._data.reflection_intensity_symmetry() operators = [] for partition in cctbx.sgtbx.cosets.left_decomposition( g=self._lattice_group, h=intensity_symmetry.space_group(). build_derived_acentric_group().make_tidy(), ).partitions[1:]: if partition[0].r().determinant() > 0: operators.append(cb_op.apply(partition[0])) return operators def _compute_rij_wij(self, use_cache=True): """Compute the rij_wij matrix. Rij is a symmetric matrix of size (n x m, n x m), where n is the number of datasets and m is the number of symmetry operations. It is composed of (m, m) blocks of size (n, n), where each block contains the correlation coefficients between cb_op_k applied to datasets 1..N with cb_op_kk applied to datasets 1.. N. If `use_cache=True`, then an optimisation is made to reflect the fact some elements of the matrix are equivalent, i.e.: CC[(a, cb_op_k), (b, cb_op_kk)] == CC[(a,), (b, cb_op_k.inverse() * cb_op_kk)] """ n_lattices = len(self._lattices) n_sym_ops = len(self.sym_ops) # Pre-calculate miller indices after application of each cb_op. Only calculate # this once per cb_op instead of on-the-fly every time we need it. indices = {} epsilons = {} space_group_type = self._data.space_group().type() for cb_op in self.sym_ops: cb_op = sgtbx.change_of_basis_op(cb_op) indices_reindexed = cb_op.apply(self._data.indices()) miller.map_to_asu(space_group_type, False, indices_reindexed) cb_op_str = cb_op.as_xyz() indices[cb_op_str] = np.array([ h.iround().as_numpy_array() for h in indices_reindexed.as_vec3_double().parts() ]).transpose() epsilons[cb_op_str] = self._patterson_group.epsilon( indices_reindexed).as_numpy_array() intensities = self._data.data().as_numpy_array() # Map indices to an array of flat 1d indices which can later be used for # matching pairs of indices offset = -np.min(np.concatenate(list(indices.values())), axis=0) dims = np.max(np.concatenate(list(indices.values())), axis=0) + offset + 1 for cb_op, hkl in indices.items(): indices[cb_op] = np.ravel_multi_index((hkl + offset).T, dims) # Create an empty 2D array of shape (m * n, L), where m is the number of sym # ops, n is the number of lattices, and L is the number of unique miller indices all_intensities = np.empty((n_sym_ops * n_lattices, np.prod(dims))) # Populate all_intensities with intensity values, filling absent intensities # with np.nan all_intensities.fill(np.nan) slices = np.append(self._lattices, intensities.size) slices = list(map(slice, slices[:-1], slices[1:])) for i, (mil_ind, eps) in enumerate(zip(indices.values(), epsilons.values())): for j, selection in enumerate(slices): # map (i, j) to a column in all_intensities column = np.ravel_multi_index((i, j), (n_sym_ops, n_lattices)) epsilon_equals_one = eps[selection] == 1 valid_mil_ind = mil_ind[selection][epsilon_equals_one] valid_intensities = intensities[selection][epsilon_equals_one] all_intensities[column, valid_mil_ind] = valid_intensities # Ideally we would use `np.ma.corrcoef` here, but it is broken, so use # pd.DataFrame.corr() instead (see numpy/numpy#15601) rij = (pd.DataFrame(all_intensities).T.dropna(how="all").corr( min_periods=self._min_pairs).values) # Set any NaN correlation coefficients to zero np.nan_to_num(rij, copy=False) # Cosym does not make use of the on-diagonal correlation coefficients np.fill_diagonal(rij, 0) if self._weights: wij = np.zeros_like(rij) right_up = np.triu_indices_from(wij, k=1) # For each correlation coefficient, set the weight equal to the size of # the sample used to calculate that coefficient pairwise_combos = itertools.combinations( np.isfinite(all_intensities), 2) sample_size = lambda x, y: np.count_nonzero(x & y) wij[right_up] = list( itertools.starmap(sample_size, pairwise_combos)) if self._weights == "standard_error": # Set each weights as the reciprocal of the standard error on the # corresponding correlation coefficient # http://www.sjsu.edu/faculty/gerstman/StatPrimer/correlation.pdf with np.errstate(divide="ignore", invalid="ignore"): reciprocal_se = np.sqrt( (wij[right_up] - 2) / (1 - np.square(rij[right_up]))) wij[right_up] = np.where(wij[right_up] > 2, reciprocal_se, 0) # Symmetrise the wij matrix wij += wij.T else: wij = None return rij, wij def compute_functional(self, x: np.ndarray) -> float: """Compute the target function at coordinates `x`. Args: x (np.ndarray): a flattened list of the N-dimensional vectors, i.e. coordinates in the first dimension are stored first, followed by the coordinates in the second dimension, etc. Returns: f (float): The value of the target function at coordinates `x`. """ assert (x.size // self.dim) == (len(self._lattices) * len(self.sym_ops)) x = x.reshape((self.dim, x.size // self.dim)) elements = np.square(self.rij_matrix - x.T @ x) if self.wij_matrix is not None: np.multiply(self.wij_matrix, elements, out=elements) f = 0.5 * elements.sum() return f def compute_gradients_fd(self, x: np.ndarray, eps=1e-6) -> np.ndarray: """Compute the gradients at coordinates `x` using finite differences. Args: x (np.ndarray): a flattened list of the N-dimensional vectors, i.e. coordinates in the first dimension are stored first, followed by the coordinates in the second dimension, etc. eps (float): The value of epsilon to use in finite difference calculations. Returns: grad (np.ndarray): The gradients of the target function with respect to the parameters. """ x = copy.deepcopy(x) grad = np.zeros(x.shape) for i in range(x.size): x[i] += eps # x + eps fp = self.compute_functional(x) x[i] -= 2 * eps # x - eps fm = self.compute_functional(x) x[i] += eps # reset to original values grad[i] += (fp - fm) / (2 * eps) return grad def compute_gradients(self, x: np.ndarray) -> np.ndarray: """Compute the gradients of the target function at coordinates `x`. Args: x (np.ndarray): a flattened list of the N-dimensional vectors, i.e. coordinates in the first dimension are stored first, followed by the coordinates in the second dimension, etc. Returns: Tuple[float, np.ndarray]: f: The value of the target function at coordinates `x`. grad: The gradients of the target function with respect to the parameters. """ x = x.reshape((self.dim, x.size // self.dim)) if self.wij_matrix is not None: wrij_matrix = np.multiply(self.wij_matrix, self.rij_matrix) grad = -2 * x @ (wrij_matrix - np.multiply(self.wij_matrix, x.T @ x)) else: grad = -2 * x @ (self.rij_matrix - x.T @ x) return grad.flatten() def curvatures(self, x: np.ndarray) -> np.ndarray: """Compute the curvature of the target function at coordinates `x`. Args: x (np.ndarray): a flattened list of the N-dimensional vectors, i.e. coordinates in the first dimension are stored first, followed by the coordinates in the second dimension, etc. Returns: curvs (np.ndarray): The curvature of the target function with respect to the parameters. """ if self.wij_matrix is not None: wij = self.wij_matrix else: wij = np.ones(self.rij_matrix.shape) x = x.reshape((self.dim, x.size // self.dim)) curvs = 2 * np.square(x) @ wij return curvs.flatten() def curvatures_fd(self, x: np.ndarray, eps=1e-6) -> np.ndarray: """Compute the curvatures at coordinates `x` using finite differences. Args: x (np.ndarray): a flattened list of the N-dimensional vectors, i.e. coordinates in the first dimension are stored first, followed by the coordinates in the second dimension, etc. eps (float): The value of epsilon to use in finite difference calculations. Returns: curvs (np.ndarray): The curvature of the target function with respect to the parameters. """ x = copy.deepcopy(x) f = self.compute_functional(x) curvs = np.zeros(x.shape) for i in range(x.size): x[i] += eps # x + eps fp = self.compute_functional(x) x[i] -= 2 * eps # x - eps fm = self.compute_functional(x) x[i] += eps # reset to original values curvs[i] += (fm - 2 * f + fp) / (eps**2) return curvs def get_sym_ops(self): """Get the list of symmetry operations used in the analysis. Returns: List[cctbx.sgtbx.rt_mx]: The list of symmetry operations. """ warnings.warn( "get_sym_ops() is deprecated, use sym_ops property instead", UserWarning, ) return self.sym_ops
def get_JSONSchema_requirements(se, root, schema_name): json_schema = { "$schema": "http://json-schema.org/draft-07/schema#", "$id":"http://example.com/" + schema_name, "title": schema_name, "type": "object", "properties":{}, "required":[], "allOf":[] } # get graph corresponding to data model schema mm_graph = se.get_nx_schema() # nodes to check for dependencies, starting with the provided root nodes_to_process = OrderedSet() nodes_to_process.add(root) # keep track of nodes with processed dependencies nodes_with_processed_dependencies = set() ''' keep checking for dependencies until there are no nodes left to process ''' while nodes_to_process: process_node = nodes_to_process.pop() ''' get allowable values for this node; each of these values is a node that in turn is processed for dependencies and allowed values ''' """ print("===============") print(mm_graph.nodes[process_node]) print("===============") """ if requires_child in mm_graph.nodes[process_node]: if mm_graph.nodes[process_node][requires_child]: children = get_node_children(mm_graph, process_node) print(children) # set allowable values based on children nodes if children: schema_properties = { process_node:{"enum":children}} json_schema["properties"].update(schema_properties) # add children for requirements processing nodes_to_process.update(children) # set conditional dependencies based on children dependencies for child in children: child_dependencies = get_node_neighbor_dependencies(mm_graph, child) if child_dependencies: schema_conditional_dependencies = { "if": { "properties": { process_node: { "enum": [child] } }, "required":[process_node], }, "then": { "required": child_dependencies }, } nodes_with_processed_dependencies.add(child) nodes_to_process.update(child_dependencies) # only append dependencies if there are any #if schema_conditional_dependencies: # json_schema["allOf"].append(schema_conditional_dependencies) ''' get required nodes by this node (e.g. other terms/nodes that need to be specified based on a data model, if the given term is specified); each of these node/terms needs to be processed for dependencies in turn. ''' if not process_node in nodes_with_processed_dependencies: process_node_dependencies = get_node_neighbor_dependencies(mm_graph, process_node) if process_node_dependencies: if process_node == root: # these are unconditional dependencies json_schema["required"] += process_node_dependencies else: # these are dependencies given the processed node schema_conditional_dependencies = { "if": { "properties": { process_node: { "string":"*" } }, "required":[process_node], }, "then": { "required": [process_node_dependencies] }, } # only append dependencies if there are any #if schema_conditional_dependencies: # json_schema["allOf"].append(schema_conditional_dependencies) nodes_to_process.update(process_node_dependencies) nodes_with_processed_dependencies.add(process_node) """ print("Nodes to process") print(nodes_to_process) print("=================") """ print("=================") print("JSONSchema successfully generated from Schema.org schema!") print("=================") # if no conditional dependencies were added we can't have an empty 'AllOf' block in the schema, so remove it if not json_schema["allOf"]: del json_schema["allOf"] return json_schema