def list_to_csr_matrix(array, dtype="double"): lengths = [len(i) for i in array] indptr = np.zeros(len(lengths) + 1) indptr[1:] = lengths if not np.max(indptr): return csr_matrix((len(lengths), 0), dtype=dtype) np.cumsum(indptr, out=indptr) return csr_matrix( (list(iterchain(*array)), list( iterchain(*(range(l) for l in lengths))), indptr), dtype=dtype)
def precache_repeated_template_sessions(self): """ Iterate through all repeated template exercises and ensure there's a buffer of pre-created sessions for each exercise in the database, waiting to be assigned to users. """ # TODO: How to account for revisions? exercises = cm.RepeatedTemplateExercise.objects.filter( content_type="REPEATED_TEMPLATE_EXERCISE") ENSURE_COUNT = 10 lang_codes = django_settings.LANGUAGES session_generator_chain = None for exercise in exercises: for lang_code, _ in lang_codes: sessions = cm.RepeatedTemplateExerciseSession.objects.filter( exercise=exercise, user=None, language_code=lang_code) generate_count = ENSURE_COUNT - sessions.count() print( "Exercise {} with language {} missing {} pre-generated sessions!" .format(exercise.name, lang_code, generate_count)) exercise_generator = [ generate_repeated_template_session.s(None, None, exercise.id, lang_code, 0) for _ in range(generate_count) ] if session_generator_chain is None: session_generator_chain = exercise_generator else: session_generator_chain = iterchain(session_generator_chain, exercise_generator) group(session_generator_chain).delay()
def izip_records(seqarrays, fill_value=None, flatten=True): """ Returns an iterator of concatenated items from a sequence of arrays. Parameters ---------- seqarray : sequence of arrays Sequence of arrays. fill_value : {None, integer} Value used to pad shorter iterables. flatten : {True, False}, Whether to """ # OK, that's a complete ripoff from Python2.6 itertools.izip_longest def sentinel(counter=([fill_value] * (len(seqarrays) - 1)).pop): "Yields the fill_value or raises IndexError" yield counter() # fillers = iterrepeat(fill_value) iters = [iterchain(it, sentinel(), fillers) for it in seqarrays] # Should we flatten the items, or just use a nested approach if flatten: zipfunc = _izip_fields_flat else: zipfunc = _izip_fields # try: for tup in iterizip(*iters): yield tuple(zipfunc(tup)) except IndexError: pass
def izip_records(seqarrays, fill_value=None, flatten=True): """ Returns an iterator of concatenated items from a sequence of arrays. Parameters ---------- seqarray : sequence of arrays Sequence of arrays. fill_value : {None, integer} Value used to pad shorter iterables. flatten : {True, False}, Whether to """ # OK, that's a complete ripoff from Python2.6 itertools.izip_longest def sentinel(counter = ([fill_value]*(len(seqarrays)-1)).pop): "Yields the fill_value or raises IndexError" yield counter() # fillers = iterrepeat(fill_value) iters = [iterchain(it, sentinel(), fillers) for it in seqarrays] # Should we flatten the items, or just use a nested approach if flatten: zipfunc = _izip_fields_flat else: zipfunc = _izip_fields # try: for tup in iterizip(*iters): yield tuple(zipfunc(tup)) except IndexError: pass
def addPlugins(self, plugins=(), extraplugins=()): """extraplugins are maintained in a separate list and re-added by loadPlugins() to prevent their being overwritten by plugins added by a subclass of PluginManager """ self._extraplugins = extraplugins for plug in iterchain(plugins, extraplugins): self.addPlugin(plug)
def model2proto(model, package_name): '''Converts a Model object to a protobuf schema string''' all_types = (iterchain( *(iterchain(_proto_iter(f.input_type), _proto_iter(f.output_type)) for f in model.methods.values()))) unique_types = _require_unique(all_types) type_names = set(t.__name__ for t in unique_types) msg_defs = tuple( _nt2proto(t, type_names) if _is_namedtuple(t) else _enum2proto(t) for t in unique_types) service_def = _gen_service(model) package_def = _package_template.format(name=package_name) defs = (_PROTO_SYNTAX, package_def, service_def) + msg_defs return '\n'.join(defs)
def forward_fundamental_rule(self, edge): self.logger.debug("Forward FR for %s" % edge) """ This fundamental rule applies to non complete trees It is searching for possible substitution and insertion trees""" for complete_edge in self.from_frontier_buckets(edge, True): self.logger.debug("Checking for combinations with %s" % edge) for new_edge in iterchain(edge.combine(complete_edge), complete_edge.combine(edge)): self.logger.debug("Produced %s" % new_edge.entry) self.agenda.add(new_edge)
def create_optims(model, optim_cls, **optim_params): return { "generator": optim_cls(model.generator.parameters(), **optim_params), "discriminator": optim_cls(model.discriminator.parameters(), **optim_params), "info": optim_cls( iterchain(model.generator.parameters(), model.discriminator.parameters()), **optim_params) }
def backward_fundamental_rule(self, complete_edge): """ This fundamental rule applies to complete trees It is searching for non complete trees and insertion trees We could probably skip complete, non insertion nodes here Decided not to because incoming edge could be an inserter""" self.logger.debug("Backward FR for %s" % complete_edge) for edge in self.from_frontier_buckets(complete_edge, False): self.logger.debug("Checking for combinations with %s" % edge) for new_edge in iterchain(edge.combine(complete_edge), complete_edge.combine(edge)): self.logger.debug("Produced %s" % new_edge.entry) self.agenda.add(new_edge)
def mapping(value, **kw): # A single mapping from a query to a set of role rules. This function # translate random YAML to cannonical schema. if not isinstance(value, dict): raise ValueError("Mapping should be a dict.") if 'role' in value: value['roles'] = value.pop('role') if 'roles' not in value: value['roles'] = [] if not isinstance(value['roles'], list): value['roles'] = [value['roles']] on_unexpected_dn = set([ r.pop('on_unexpected_dn') for r in value['roles'] if 'on_unexpected_dn' in r ]) if len(on_unexpected_dn) > 1: raise ValueError("Mixed on_unexpected_dn not supported.") try: on_unexpected_dn = next(iter(on_unexpected_dn)) except StopIteration: on_unexpected_dn = 'fail' value['roles'] = [rolerule(r) for r in value['roles']] if 'grant' in value: if isinstance(value['grant'], dict): value['grant'] = [value['grant']] value['grant'] = [grantrule(g, **kw) for g in value['grant']] if not value['roles'] and 'grant' not in value: # Don't accept unused LDAP queries. raise ValueError("Missing role or grant rule.") if 'ldap' in value: roles = value.get('roles', []) grants = value.get('grant', []) if any([r.names.has_static for r in roles]): raise ValueError("Mixing static role with LDAP query may hide it.") if any([r.roles.has_static for r in grants]): raise ValueError("Mixing static role with LDAP query may hide it.") format_fields = set(iterchain(*[r.all_fields for r in roles + grants])) value['ldap'].setdefault('on_unexpected_dn', on_unexpected_dn) value['ldap'] = ldapquery(value['ldap'], format_fields) return value
def all_atom_min_distances(reslist): """ Foobar... """ N = len(reslist) other_output_dict = dict() closest_atom = dict() other_output_dict['closest_atom'] = closest_atom #each residue might have a variable number of atoms, just to make things hard. #It might not even be standard based on which amino acid it is, for example with unresolved atoms, having or hiding hydrogen, or wahtever. >.< num_atoms_per_res = [len(oneres) for oneres in reslist] atom_offset = S.cumsum([0] + num_atoms_per_res) #beginning indexes for each residue, an extra end element gives the total length to make it easy to use, see below #get one flattened list of atoms allatoms = list(iterchain(*[[atom for atom in res] for res in reslist]))#We can trust this more than P.Selection.unfold... This keeps atoms in the same order. #create a single flat list of atom coordinates allcoords = S.array([atom.coord for atom in allatoms]) #and turn it into an Nx3 matrix, N is the number of residues atom_D = dists_from_coords(allcoords)#use the law of cosines to quickly calculate all the distances. final_D = S.zeros((N,N)) for i in range(N): for j in range(i+1,N): submatrix = atom_D[atom_offset[i]:atom_offset[i+1],atom_offset[j]:atom_offset[j+1]] minspot = submatrix.argmin() min_dist = submatrix.flat[minspot] final_D[i,j] = min_dist final_D[j,i] = min_dist i_atom = allatoms[ atom_offset[i] + minspot / submatrix.shape[1] ] j_atom = allatoms[ atom_offset[j] + minspot % submatrix.shape[1] ] closest_atom[i,j] = (i_atom,j_atom) closest_atom[j,i] = (j_atom,i_atom) #end nested for return final_D, other_output_dict
def merge_arrays(seqarrays, fill_value=-1, flatten=False, usemask=True, asrecarray=False): """ Merge arrays field by field. Parameters ---------- seqarrays : sequence of ndarrays Sequence of arrays fill_value : {float}, optional Filling value used to pad missing data on the shorter arrays. flatten : {False, True}, optional Whether to collapse nested fields. usemask : {False, True}, optional Whether to return a masked array or not. asrecarray : {False, True}, optional Whether to return a recarray (MaskedRecords) or not. Examples -------- >>> merge_arrays((np.array([1, 2]), np.array([10., 20., 30.]))) masked_array(data = [(1, 10.0) (2, 20.0) (--, 30.0)], mask = [(False, False) (False, False) (True, False)], fill_value=(999999, 1e+20) dtype=[('f0', '<i4'), ('f1', '<f8')]) >>> merge_arrays((np.array([1, 2]), np.array([10., 20., 30.])), ... usemask=False) array(data = [(1, 10.0) (2, 20.0) (-1, 30.0)], dtype=[('f0', '<i4'), ('f1', '<f8')]) >>> merge_arrays((np.array([1, 2]).view([('a', int)]), np.array([10., 20., 30.])), usemask=False, asrecarray=True) rec.array(data = [(1, 10.0) (2, 20.0) (-1, 30.0)], dtype=[('a', int), ('f1', '<f8')]) """ if (len(seqarrays) == 1): seqarrays = seqarrays[0] if isinstance(seqarrays, ndarray): seqdtype = seqarrays.dtype if (not flatten) or \ (zip_descr((seqarrays,), flatten=True) == seqdtype.descr): seqarrays = seqarrays.ravel() if not seqdtype.names: seqarrays = seqarrays.view([('', seqdtype)]) if usemask: if asrecarray: return seqarrays.view(MaskedRecords) return seqarrays.view(MaskedArray) elif asrecarray: return seqarrays.view(recarray) return seqarrays else: seqarrays = (seqarrays,) # Get the dtype newdtype = zip_descr(seqarrays, flatten=flatten) # Get the data and the fill_value from each array seqdata = [ma.getdata(a.ravel()) for a in seqarrays] seqmask = [ma.getmaskarray(a).ravel() for a in seqarrays] fill_value = [_check_fill_value(fill_value, a.dtype) for a in seqdata] # Make an iterator from each array, padding w/ fill_values maxlength = max(len(a) for a in seqarrays) for (i, (a, m, fval)) in enumerate(zip(seqdata, seqmask, fill_value)): # Flatten the fill_values if there's only one field if isinstance(fval, (ndarray, np.void)): fmsk = ma.ones((1,), m.dtype)[0] if len(fval.dtype) == 1: fval = fval.item()[0] fmsk = True else: # fval and fmsk should be np.void objects fval = np.array([fval,], dtype=a.dtype)[0] # fmsk = np.array([fmsk,], dtype=m.dtype)[0] else: fmsk = True nbmissing = (maxlength-len(a)) seqdata[i] = iterchain(a, [fval]*nbmissing) seqmask[i] = iterchain(m, [fmsk]*nbmissing) # data = izip_records(seqdata, flatten=flatten) data = tuple(data) if usemask: mask = izip_records(seqmask, fill_value=True, flatten=flatten) mask = tuple(mask) output = ma.array(np.fromiter(data, dtype=newdtype)) output._mask[:] = list(mask) if asrecarray: output = output.view(MaskedRecords) else: output = np.fromiter(data, dtype=newdtype) if asrecarray: output = output.view(recarray) return output
def print_design(mut="mut", flex="flex", out="design.cfs", flex_only=False, limit_seqspace = False): """ Print out the OSPREY config information for a design to file Args: mut (str): PyMol selection name for the mutable design residues flex (str): PyMol selection name for the flexible design residues out (str): System path to output file """ try: ## Find the input pdb file #pdb_file_name = find_pdb_file(mut, flex) # Collect residue lists from selections mut_list = residue.Residue.sele_to_res(mut) flex_list = residue.Residue.sele_to_res(flex) # Find the input pdb file pdb_file_name = find_pdb_file(mut, flex) # Make sure we don't have the mutable residues in the flexible selection if set(e.str_long() for e in mut_list).intersection( set(e.str_long() for e in flex_list)): raise CmdException("ERROR!: You forgot to remove the mutable residues from the"+ " flexible ones!") # Collect the chains required for design # Note that the "chain" function here comes from itertools chains = set([ res.chain_id for res in iterchain(mut_list, flex_list) ]) # TODO: Consider changing the format to be easier to read and write... # For each chain, process muts and flex strand_defs = {} strand_flex_all = {} for counter, chain in enumerate(chains): strand_key = STRAND_NAME+str(counter) # First, get all residues in chain stored.all_res_list = [] cmd.iterate("chain "+chain+" and name ca", "stored.all_res_list.append(\"\"+chain+resi)") # Store only the first and last strand_defs[strand_key] = [ stored.all_res_list[0], stored.all_res_list[-1] ] # Define strand flexibilities / mutabilities this_strand_flex = {} # For mutable residues in strand, add all mutations by default chain_muts = (res for res in mut_list if res.chain_id == chain) for res in chain_muts: allowed_mutations = [res.res_name] if not flex_only: if limit_seqspace: allowed_mutations = SIMILAR_MUTATIONS_1[res.res_name] else: allowed_mutations = DEFAULT_MUTATIONS_ALL this_strand_flex[res.chain_id+str(res.res_seq)+res.i_code] =\ allowed_mutations # For flexible residues in strand, add only res name chain_muts = (res for res in flex_list if res.chain_id == chain) for res in chain_muts: this_strand_flex[res.chain_id+str(res.res_seq)+res.i_code] = \ [res.res_name] strand_flex_all[strand_key] = this_strand_flex conf_size = get_confspace_size(strand_flex_all) conf_size_str = '%.3E' % conf_size num_res = str(get_num_res(strand_flex_all))+"res" # Modify out with confsize basename, ext = os.path.splitext(out) new_name = '_'.join([basename, num_res, conf_size_str])+ext print("Writing "+new_name) # Print out variables with open(new_name, 'w') as f: f.write(HEADER+"\n") f.write("mol = \""+str(pdb_file_name)+"\"\n") f.write("strand_defs = "+str(strand_defs)+"\n") f.write("strand_flex = "+str(strand_flex_all)+"\n") #for strand in strand_defs: #f.write(strand+" = "+str(strand_defs[strand])+"\n") #for strand in strand_flex_all: #f.write(strand+"_flex = "+str(strand_flex_all[strand])+"\n") except Exception as e: print(e)
def merge_arrays(seqarrays, fill_value=-1, flatten=False, usemask=True, asrecarray=False): """ Merge arrays field by field. Parameters ---------- seqarrays : sequence of ndarrays Sequence of arrays fill_value : {float}, optional Filling value used to pad missing data on the shorter arrays. flatten : {False, True}, optional Whether to collapse nested fields. usemask : {False, True}, optional Whether to return a masked array or not. asrecarray : {False, True}, optional Whether to return a recarray (MaskedRecords) or not. Examples -------- >>> merge_arrays((np.array([1, 2]), np.array([10., 20., 30.]))) masked_array(data = [(1, 10.0) (2, 20.0) (--, 30.0)], mask = [(False, False) (False, False) (True, False)], fill_value=(999999, 1e+20) dtype=[('f0', '<i4'), ('f1', '<f8')]) >>> merge_arrays((np.array([1, 2]), np.array([10., 20., 30.])), ... usemask=False) array(data = [(1, 10.0) (2, 20.0) (-1, 30.0)], dtype=[('f0', '<i4'), ('f1', '<f8')]) >>> merge_arrays((np.array([1, 2]).view([('a', int)]), np.array([10., 20., 30.])), usemask=False, asrecarray=True) rec.array(data = [(1, 10.0) (2, 20.0) (-1, 30.0)], dtype=[('a', int), ('f1', '<f8')]) """ if (len(seqarrays) == 1): seqarrays = seqarrays[0] if isinstance(seqarrays, ndarray): seqdtype = seqarrays.dtype if (not flatten) or \ (zip_descr((seqarrays,), flatten=True) == seqdtype.descr): seqarrays = seqarrays.ravel() if not seqdtype.names: seqarrays = seqarrays.view([('', seqdtype)]) if usemask: if asrecarray: return seqarrays.view(MaskedRecords) return seqarrays.view(MaskedArray) elif asrecarray: return seqarrays.view(recarray) return seqarrays else: seqarrays = (seqarrays, ) # Get the dtype newdtype = zip_descr(seqarrays, flatten=flatten) # Get the data and the fill_value from each array seqdata = [ma.getdata(a.ravel()) for a in seqarrays] seqmask = [ma.getmaskarray(a).ravel() for a in seqarrays] fill_value = [_check_fill_value(fill_value, a.dtype) for a in seqdata] # Make an iterator from each array, padding w/ fill_values maxlength = max(len(a) for a in seqarrays) for (i, (a, m, fval)) in enumerate(zip(seqdata, seqmask, fill_value)): # Flatten the fill_values if there's only one field if isinstance(fval, (ndarray, np.void)): fmsk = ma.ones((1, ), m.dtype)[0] if len(fval.dtype) == 1: fval = fval.item()[0] fmsk = True else: # fval and fmsk should be np.void objects fval = np.array([ fval, ], dtype=a.dtype)[0] # fmsk = np.array([fmsk,], dtype=m.dtype)[0] else: fmsk = True nbmissing = (maxlength - len(a)) seqdata[i] = iterchain(a, [fval] * nbmissing) seqmask[i] = iterchain(m, [fmsk] * nbmissing) # data = izip_records(seqdata, flatten=flatten) data = tuple(data) if usemask: mask = izip_records(seqmask, fill_value=True, flatten=flatten) mask = tuple(mask) output = ma.array(np.fromiter(data, dtype=newdtype)) output._mask[:] = list(mask) if asrecarray: output = output.view(MaskedRecords) else: output = np.fromiter(data, dtype=newdtype) if asrecarray: output = output.view(recarray) return output