def nn_coll_adjusted(person, jokeId): # YASH IS WORKING ON THIS # N = 10 # sum = 0.0 # simSum = 0.0 # avg = nn_item_average(person, jokeId) # nearestNeighbors = nNN_users(N, person, jokeId) # for n in range(len(nearestNeighbors)): # simSum += nearestNeighbors[n][0] # computing K # sum += nearestNeighbors[n][0] * (rawRatings[nearestNeighbors[n][1], jokeId - 1] - avg) # # k = 1.0 / float(simSum) # adjusted = avg + (k * sum) # return adjusted N = 10 nearestNeighbors = nNN_users(N, person, jokeId) avg = nn_item_average(person, jokeId) k = computeK(person, jokeId, avg) adjWeightGen = ( rawRatings[ nearestNeighbors[n][1], jokeId - 1 ] - avg for n in range(N) ) adjWeights = np.fromiter(adjWeightGen, dtype='float_', count=N) nnGen = ( nearestNeighbors[n][0] for n in range(N) ) ratings = np.fromiter(nnGen, dtype='float_', count=N) vals = ratings * adjWeights return avg + k * float(np.sum(vals))
def function(self, simulation, period): period = period.start.period(u'month').offset('first-of') depcom_entreprise = simulation.calculate('depcom_entreprise', period) effectif_entreprise = simulation.calculate('effectif_entreprise', period) categorie_salarie = simulation.calculate('categorie_salarie', period) seuil_effectif = simulation.legislation_at(period.start).cotsoc.versement_transport.seuil_effectif preload_taux_versement_transport() public = (categorie_salarie >= 2) default_value = 0.0 taux_aot = fromiter( ( taux_aot_by_depcom.get(depcom_cell, default_value) for depcom_cell in depcom_entreprise ), dtype = 'float', ) taux_smt = fromiter( ( taux_smt_by_depcom.get(depcom_cell, default_value) for depcom_cell in depcom_entreprise ), dtype = 'float', ) # "L'entreprise emploie-t-elle plus de 9 salariés dans le périmètre de l'Autorité organisatrice de transport # (AOT) suivante ou syndicat mixte de transport (SMT)" return period, (taux_aot + taux_smt) * or_(effectif_entreprise >= seuil_effectif, public) / 100
def test_sqlite(): enter() sqlquery = "(f2>.9) and ((f8>.3) and (f8<.4))" # the query con = sqlite3.connect(":memory:") # Create table fields = "(%s)" % ",".join(["f%d real"%i for i in range(NC)]) con.execute("create table bench %s" % fields) # Insert a NR rows of data vals = "(%s)" % ",".join(["?" for i in range(NC)]) with con: con.executemany("insert into bench values %s" % vals, (mv+np.random.rand(NC)-mv for i in xrange(int(NR)))) after_create() out = np.fromiter( (row for row in con.execute( "select f1, f3 from bench where %s" % sqlquery)), dtype="f8,f8") after_query("non-indexed") # Create indexes con.execute("create index f1idx on bench (f1)") con.execute("create index f2idx on bench (f8)") after_create("index") out = np.fromiter( (row for row in con.execute( "select f1, f3 from bench where %s" % sqlquery)), dtype="f8,f8") after_query("indexed") return out
def decode4js(obj): """ return decoded Python object from encoded object. """ out = obj if isinstance(obj, dict): classname = obj.pop('__class__', None) if classname is None: return obj elif classname == 'Complex': out = obj['value'][0] + 1j*obj['value'][1] elif classname in ('List', 'Tuple'): out = [] for item in obj['value']: out.append(decode4js(item)) if classname == 'Tuple': out = tuple(out) elif classname == 'Array': if obj['__dtype__'].startswith('complex'): re = np.fromiter(obj['value'][0], dtype='double') im = np.fromiter(obj['value'][1], dtype='double') out = re + 1j*im else: out = np.fromiter(obj['value'], dtype=obj['__dtype__']) out.shape = obj['__shape__'] elif classname in ('Dict', 'Parameter', 'Group'): out = {} for key, val in obj.items(): out[key] = decode4js(val) if classname == 'Parameter': out = Parameter(**out) elif classname == 'Group': out = Group(**out) return out
def _stats_to_movie_results(bam_stats, movie_names): """ Separate out per-movie results from process stats. """ results = [] movies = sorted(list(movie_names)) for movie_name in movies: def _base_calls(): for r in bam_stats: if r.movieName == movie_name: yield r.qLen def _num_passes(): for r in bam_stats: if r.movieName == movie_name: yield r.numPasses def _accuracy(): for r in bam_stats: if r.movieName == movie_name: yield r.readScore read_lengths = np.fromiter(_base_calls(), dtype=np.int64, count=-1) num_passes = np.fromiter(_num_passes(), dtype=np.int64, count=-1) accuracy = np.fromiter(_accuracy(), dtype=np.float, count=-1) results.append(MovieResult( movie_name, read_lengths, accuracy, num_passes)) return results
def _read_outcomes(match): """Read gambit outcome format""" role_names = _string_list(match.group('roles')) num_roles = len(role_names) strat_names = [_string_list(m.group()) for m in _RE_STRATS.finditer(match.group('strats')[1:-1])] utils.check( len(strat_names) == num_roles, 'player names and strategies differed in length') num_strats = np.fromiter(map(len, strat_names), int, num_roles) outcomes = [np.zeros(num_roles)] for omatch in _RE_OUTCOME.finditer(match.group('outcomes')[1:-1]): outcome = omatch.group()[1:-1] pays = outcome[next(_RE_STR.finditer(outcome)).end():].split() utils.check( len(pays) == num_roles, 'outcome has wrong number of payoffs') outcomes.append(np.fromiter( # pragma: no branch (float(s.rstrip(',')) for s in pays), float, num_roles)) outcomes = np.stack(outcomes) inds = match.group('inds').split() utils.check(len(inds) == num_strats.prod(), 'wrong number of outcomes') inds = np.fromiter(map(int, inds), int, len(inds)) matrix = np.empty(tuple(num_strats) + (num_roles,)) tinds = tuple(range(num_roles - 1, -1, -1)) + (num_roles,) np.transpose(matrix, tinds).flat = outcomes[inds] return _normalize(role_names, strat_names, matrix)
def calculate_switch_stats(mappable, linkage_map_file, linkage_map_format, MST_grouping_threshold): genotypes_of_locus = mappable if linkage_map_format.lower() == 'mst': ini_map, loci_on_lg = parse_map_file_MST(linkage_map_file) elif linkage_map_format.lower() == 'rqtl': ini_map, loci_on_lg = parse_map_file_rqtl(linkage_map_file) else: raise ValueError("unknown linkage_map_format") int_arr = convert_genotypes_to_int_array(genotypes_of_locus, ini_map) num_loci = int_arr.shape[0] num_pairs = int((num_loci * (num_loci-1))/2) pairs = itertools.combinations(int_arr, 2) R = numpy.fromiter(getR(pairs), dtype = numpy.float64, count = num_pairs) pairs = itertools.combinations(int_arr, 2) NR = numpy.fromiter(getNR(pairs), dtype = numpy.float64, count = num_pairs) ml_R_frac = get_ml_R_frac(R = R, NR = NR) Z = get_LOD(R = R, NR = NR, R_frac = ml_R_frac) NR_matrix = get_NR_matrix(NR) #rf = get_rf_matrix(ml_R_frac) lod = get_lod_matrix(Z) index_of_lg = get_index_of_LG(loci_on_lg) lgs_longer_than_1 = find_LGs_with_multiple_loci(index_of_lg, loci_on_lg) #mean_rf = get_LG_pairwise_mean_rf(lgs_longer_than_1, rf, index_of_lg) #mean_lod = get_LG_pairwise_mean_lod(lgs_longer_than_1,lod, index_of_lg) sum_lod = get_LG_pairwise_sum_lod(lgs_longer_than_1,lod, index_of_lg) sq_sum_lod = get_square_form(sum_lod, lgs_longer_than_1) n = len(mappable.items()[0][1]) #number of individuals NR_threshold = get_threshold_recombinants_for_same_LGs(n, MST_grouping_threshold) NR_under_threshold = get_LG_pairwise_count_NR_threshold(lgs_longer_than_1, NR_matrix, index_of_lg, threshold = NR_threshold) sq_NR_matrix = get_square_form(NR_under_threshold, lgs_longer_than_1) return(ini_map, sq_sum_lod, sq_NR_matrix, R, NR, lgs_longer_than_1)
def __call__(self, data): if isinstance(data, Instance): return self.negate != (data[self.column] == self.value) if isinstance(data, Storage): try: return data._filter_same_value(self.column, self.value, self.negate) except NotImplementedError: pass column = data.domain.index(self.column) if data.domain[column].is_primitive() and not isinstance(self.value, Real): value = data.domain[column].to_val(self.value) else: value = self.value if column >= 0: if self.negate: retain = np.fromiter((inst[column] != value for inst in data), bool, len(data)) else: retain = np.fromiter((inst[column] == value for inst in data), bool, len(data)) else: column = -1 - column if self.negate: retain = np.fromiter((inst._metas[column] != value for inst in data), bool, len(data)) else: retain = np.fromiter((inst._metas[column] == value for inst in data), bool, len(data)) return data[retain]
def _listparser(dlist, freq=None): "Constructs a DateArray from a list." dlist = np.array(dlist, copy=False, ndmin=1) # Case #1: dates as strings ................. if dlist.dtype.kind in 'SU': #...construct a list of dates dlist = np.fromiter((Date(freq, string=s).value for s in dlist), dtype=int) # Case #2: dates as numbers ................. elif dlist.dtype.kind in 'if': #...hopefully, they are values pass # Case #3: dates as objects ................. elif dlist.dtype.kind == 'O': template = dlist[0] #...as Date objects if isinstance(template, Date): dlist = np.fromiter((d.value for d in dlist), dtype=int) if freq in (_c.FR_UND, None): freq = template.freq #...as mx.DateTime objects elif hasattr(template, 'absdays'): dlist = np.fromiter((Date(freq, datetime=m) for m in dlist), dtype=int) #...as datetime objects elif hasattr(template, 'toordinal'): dlist = np.fromiter((Date(freq, datetime=d) for d in dlist), dtype=int) # result = dlist.view(DateArray) result.freq = freq return result
def mmphi(self): """Returns the Morris-Mitchell sampling criterion for this Latin hypercube. """ if self.phi is None: distdict = {} # Calculate the norm between each pair of points in the DOE arr = self.doe n, m = arr.shape for i in range(1, n): nrm = np.linalg.norm(arr[i] - arr[:i], ord=self.p, axis=1) for j in range(0, i): nrmj = nrm[j] if nrmj in distdict: distdict[nrmj] += 1 else: distdict[nrmj] = 1 size = len(distdict) distinct_d = np.fromiter(distdict, dtype=float, count=size) # Mutltiplicity array with a count of how many pairs of points # have a given distance J = np.fromiter(itervalues(distdict), dtype=int, count=size) self.phi = sum(J * (distinct_d ** (-self.q))) ** (1.0 / self.q) return self.phi
def _bam_file_to_movie_results(file_name): """ Read what is assumed to be a single BAM file (as a ConsensusReadSet). """ from pbcore.io import IndexedBamReader results = [] with IndexedBamReader(file_name) as bam: for rg in bam.readGroupTable: assert rg["ReadType"] == "CCS" movies = list(set([rg["MovieName"] for rg in bam.readGroupTable])) for movie_name in movies: def _base_calls(): for r in bam: if r.movieName == movie_name: yield r.peer.query_length def _num_passes(): for r in bam: if r.movieName == movie_name: yield r.numPasses def _accuracy(): for r in bam: if r.movieName == movie_name: yield r.readScore read_lengths = np.fromiter(_base_calls(), dtype=np.int64, count=-1) num_passes = np.fromiter(_num_passes(), dtype=np.int64, count=-1) accuracy = np.fromiter(_accuracy(), dtype=np.float, count=-1) results.append(MovieResult( file_name, movie_name, read_lengths, accuracy, num_passes)) return results
def __init__(self, parsed_mesh, borders=None, default_border="land", ignore_given_edges=False, projection=None): if borders is None: borders = {} self.elements = parsed_mesh.elements self.nodes = meshtools.project_nodes(projection, parsed_mesh.elements, parsed_mesh.nodes, attempt_flatten=True) self.edge_collections = \ meshtools.organize_edges(parsed_mesh.edges, borders=borders, default_border=default_border) if max(map(len, self.edge_collections.values())) == 0 \ or ignore_given_edges: self.edge_collections = {default_border: set(meshtools.extract_boundary_edges(self.elements))} if len(np.unique(self.elements)) != self.nodes.shape[0]: self._fix_unused_nodes() self.boundary_nodes = {} interior_nodes = set(range(1, len(self.nodes)+1)) for name, edge_collection in self.edge_collections.items(): self.boundary_nodes[name] = \ np.fromiter(set(node for edge in edge_collection for node in edge[0:-1]), int) interior_nodes -= set(self.boundary_nodes[name]) self.interior_nodes = np.fromiter(interior_nodes, int) self.order = _element_order(self.elements.shape[1]) self.mean_stepsize = self._get_stepsize()
def system_values(self, when: Union[Real, Sequence[Real]], which: Union[str, Sequence[str]]=None): which = self._observable_names if which is None else which max_when = when if isinstance(when, Real) else max(when) self.integrate_to(max_when) if len(self.solution_times) == 1: # Handle scipy bug when there is only one time point # TODO (drhagen): super hacky solution here state_interpolator = lambda t: self.solution_states[0] else: state_interpolator = interp1d(self.solution_times, self.solution_states, axis=0, assume_sorted=True, copy=False) # Extract values from solution output_fun = self.ode_system.outputs if isinstance(which, str) and isinstance(when, Real): states = state_interpolator(when) return output_fun(which, when, states) elif isinstance(which, str): return np.fromiter((output_fun(which, when_i, state_interpolator(when_i)) for when_i in when), 'float', count=len(when)) elif isinstance(when, Real): states = state_interpolator(when) return np.fromiter((output_fun(which_i, when, states) for which_i in which), 'float', count=len(which)) else: def values(): for when_i in when: states = state_interpolator(when_i) for which_i in which: yield output_fun(which_i, when_i, states) values = np.fromiter(values(), 'float', count=len(which)*len(when)) return np.reshape(values, [len(when), len(which)])
def fit(self, X): adj_matrix = self.adj_matrix_strategy(X) if self.initial_ordering is None: ordering = np.arange(len(X), dtype=int) else: ordering = self.initial_ordering assert len(ordering) == len(X), \ "initial_ordering has wrong length" order = prc.createOrder(ordering) labels = prc.ivec([0]*len(X)) policy = prc.iprPolicyStruct() policy.iprNumberOfClustering = self.number_of_clustering policy.iprMaxIterations = self.max_iterations policy.iprConvergenceThreshold = self.convergence_threshold res = prc.ipr(adj_matrix, order, labels, self.n_clusters, policy) self._ordering = np.fromiter(order.vdata, dtype=int) ## calculate boundaries of original matrix w/ new ordering prc.calcBoundaries(adj_matrix, order) raw_boundaries = order.b.b self._boundary = np.fromiter(raw_boundaries, dtype=float)[:-1] # XXX: slice needed? self._width = np.sort(self._boundary)[::-1] self.labels_ = np.fromiter(labels, dtype=int) self._pinch_ratios, _ = compute_pr_cluster_indices( self._ordering, self._boundary, self.n_clusters, compute_thick_part_PR)
def _fit_once(self, X, initial_order): adj_matrix = self.adj_matrix_strategy(X) N = adj_matrix.shape[0] degrees = adj_matrix.sum(axis=1) boundary = np.zeros(N) ordering = prc.createOrder(initial_order) policy = prc.tiloPolicyStruct() if self.refine_order: prc.RefineTILO(adj_matrix, ordering, policy) else: prc.TILO(adj_matrix, ordering, policy) boundary = np.fromiter(ordering.b.b, dtype=float)[:-1] ordering = np.fromiter(ordering.vdata, dtype=int) #print 'BDR', boundary #print 'PRS', pinch_ratios(boundary) #print 'ORD', ordering pinch_ratios, clusters = self._find_clusters(ordering, boundary) labels = np.zeros(N, dtype=int) for i, cluster in enumerate(clusters): labels[cluster] = i return ordering, boundary, labels, pinch_ratios
def _match_sub(disc_clsdict, gold_clsdict, phn_corpus, names, label, verbose, n_jobs): em = eval_from_psets if verbose: print ' matching ({2}): subsampled {0} files in {1} sets'\ .format(sum(map(len, names)), len(names), label) with verb_print(' matching ({0}): prepping psets'.format(label), verbose, True, True, True): pdiscs = [make_pdisc(disc_clsdict.restrict(fs, True), False, False) for fs in names] pgolds = [make_pgold(gold_clsdict.restrict(fs, True), False, False) for fs in names] psubs = [make_psubs(disc_clsdict.restrict(fs, True), phn_corpus, 3, 20, False, False) for fs in names] with verb_print(' matching ({0}): calculating scores' .format(label), verbose, False, True, False): tp, tr = izip(*Parallel(n_jobs=n_jobs, verbose=5 if verbose else 0, pre_dispatch='n_jobs') (delayed(em)(pdisc, pgold, psub) for pdisc, pgold, psub in zip(pdiscs, pgolds, psubs))) tp, tr = np.fromiter(tp, dtype=np.double), np.fromiter(tr, dtype=np.double) tp, tr = praggregate(tp, tr) return tp, tr
def token_type(disc_clsdict, wrd_corpus, fragments_within, fragments_cross, dest, verbose, n_jobs): if verbose: print banner('TOKEN/TYPE') ptoc, rtoc, ptyc, rtyc = _token_type_sub(disc_clsdict, wrd_corpus, fragments_cross, 'cross', verbose, n_jobs) ftoc = np.fromiter((fscore(ptoc[i], rtoc[i]) for i in xrange(ptoc.shape[0])), dtype=np.double) ftyc = np.fromiter((fscore(ptyc[i], rtyc[i]) for i in xrange(ptyc.shape[0])), dtype=np.double) ptow, rtow, ptyw, rtyw = _token_type_sub(disc_clsdict, wrd_corpus, fragments_within, 'within', verbose, n_jobs) ftow = np.fromiter((fscore(ptow[i], rtow[i]) for i in xrange(ptow.shape[0])), dtype=np.double) ftyw = np.fromiter((fscore(ptyw[i], rtyw[i]) for i in xrange(rtyw.shape[0])), dtype=np.double) with open(path.join(dest, 'token_type'), 'w') as fid: fid.write(pretty_score_f(ptoc, rtoc, ftoc, 'token total', len(fragments_cross), sum(map(len, fragments_cross)))) fid.write('\n') fid.write(pretty_score_f(ptyc, rtyc, ftyc, 'type total', len(fragments_cross), sum(map(len, fragments_cross)))) fid.write('\n') fid.write(pretty_score_f(ptow, rtow, ftow, 'token within-speaker only', len(fragments_within), sum(map(len, fragments_within)))) fid.write('\n') fid.write(pretty_score_f(ptyw, rtyw, ftyw, 'type within-speaker only', len(fragments_within), sum(map(len, fragments_within))))
def where_close(pos, separation, intensity=None): """ Returns indices of features that are closer than separation from other features. When intensity is given, the one with the lowest intensity is returned: else the most topleft is returned (to avoid randomness) To be implemented in trackpy v0.4""" if len(pos) == 0: return [] separation = validate_tuple(separation, pos.shape[1]) if any([s == 0 for s in separation]): return [] # Rescale positions, so that pairs are identified below a distance # of 1. pos_rescaled = pos / separation duplicates = cKDTree(pos_rescaled, 30).query_pairs(1 - 1e-7) if len(duplicates) == 0: return [] index_0 = np.fromiter((x[0] for x in duplicates), dtype=int) index_1 = np.fromiter((x[1] for x in duplicates), dtype=int) if intensity is None: to_drop = np.where(np.sum(pos_rescaled[index_0], 1) > np.sum(pos_rescaled[index_1], 1), index_1, index_0) else: intensity_0 = intensity[index_0] intensity_1 = intensity[index_1] to_drop = np.where(intensity_0 > intensity_1, index_1, index_0) edge_cases = intensity_0 == intensity_1 if np.any(edge_cases): index_0 = index_0[edge_cases] index_1 = index_1[edge_cases] to_drop[edge_cases] = np.where(np.sum(pos_rescaled[index_0], 1) > np.sum(pos_rescaled[index_1], 1), index_1, index_0) return np.unique(to_drop)
def flatten_correspondences(fm_nestlist, fs_nestlist, daid_nestlist, query_sccw): """ helper """ iflat_ = utool.iflatten DAID_DTYPE = hstypes.INDEX_TYPE FS_DTYPE = hstypes.FS_DTYPE FM_DTYPE = hstypes.FM_DTYPE #_all_daids = np.array(list(utool.iflatten(daid_nestlist)), dtype=hstypes.INDEX_TYPE) #_all_scores = np.array(list(utool.iflatten(fs_nestlist)), dtype=hstypes.FS_DTYPE) * query_sccw #_all_matches = np.array(list(utool.iflatten(fm_nestlist)), dtype=hstypes.FM_DTYPE) #count1 = sum(map(len, daid_nestlist)) count = sum(map(len, fs_nestlist)) #count3 = sum(map(len, fm_nestlist)) all_daids = np.fromiter(iflat_(daid_nestlist), DAID_DTYPE, count) all_scores = np.fromiter(iflat_(fs_nestlist), FS_DTYPE, count) * query_sccw # Shape hack so we can use fromiter which outputs a 1D array all_matches = np.fromiter(iflat_(iflat_(fm_nestlist)), FM_DTYPE, 2 * count) all_matches.shape = (all_matches.size / 2, 2) if utool.DEBUG2: assert len(all_daids) == len(all_scores), 'inconsistent len' assert len(all_matches) == len(all_scores), 'inconsistent len' print('[smk_core] checked build_chipmatch flatten ...ok') return all_matches, all_scores, all_daids
def _join_staves(staff_dist, *sections): staff_dict = dict((s, np.array([s])) for s in sections[0]) for i, cur_staves in enumerate(sections[1:]): last_staves = np.sort(np.fromiter(staff_dict.keys(), int)) dist = np.abs(last_staves[None, :] - cur_staves[:, None]) did_match = dist.min(axis=1) < staff_dist new_matches = dict() matching_staves = cur_staves[did_match] matches = np.argmin(dist[did_match, :], axis=1) matches, idx = np.unique(matches, return_index=True) matching_staves = matching_staves[idx] for staff_ind, new_point in zip(matches, matching_staves): prev_staff = staff_dict[last_staves[staff_ind]] new_matches[new_point] = np.concatenate([prev_staff, [new_point]]) non_matches = cur_staves[~did_match] for non_match in non_matches: new_matches[non_match] = np.asarray([-non_match] * (i + 1) + [non_match]) skipped = set(staff_dict.keys()).difference(s[-2] for s in new_matches.values()) for s in skipped: new_matches[s] = np.concatenate([staff_dict[s], [-s]]) staff_dict = new_matches return np.asarray([staff_dict[s] for s in np.sort(np.fromiter(staff_dict.keys(), int))])
def __getpe(self,tree): """ Given the symbol counts at various depths, calculate the memoryless probabilities (in log2-space) of the corresponding sequences using the KT estimator KT-estimate is defined as: Pe := Prod_{foreach symbol in ALPHABET}( (symbol counts-1/2)! ) / .. ( ( total symbol counts - len(ALPHABET)/2 )! ) Keyword arguments: tree: (dict) : keys are occuring contexts (str), counts are symbol counts for symbols of alphabet given context Returns: tree_pe: (dict): keys are occuring contexts (str), values are the memoryless probabilities of the sequence corresponding to this context / we define log_2(0) = 0 """ treepe = dict() for context,vals in tree.items(): lengthsubseq = sum(vals) if lengthsubseq > 0: # KT - estimator denum = np.log2(np.fromiter(range(lengthsubseq), float)+len(ALPHABET)/2).sum() numer = 0 for x in vals: if x>0: numer+=np.log2(np.fromiter(range(1,x+1), float)-1/2).sum() treepe[context] = numer-denum else: treepe[context] = 0 return treepe
def SNrest(): path = "../data/restframe/" objnames, band, mjd, mag, magerr, stype = [],[],[],[],[], [] formatcode = ('|S16,'.rstrip('#') +'f8,'*6 + '|S16,' + 4 * 'f8,' + '|S16,' * 3 + 'f8,' * 2 + '|S16,' + 'f8,' * 2) filenames = os.listdir(path) for filename in filenames: data = np.recfromtxt(os.path.join(path, filename),usecols = (0,1,2,3,4), dtype = formatcode, names = True, skip_header = 13, case_sensitive = 'lower', invalid_raise = False) name = np.empty(len(data.band), dtype = 'S20') name.fill(filename) objnames.append(name) data.band = [x.lower() for x in data.band] band.append(data.band) mjd.append(data.phase) mag.append(data.mag) magerr.append(data.err) objnames = np.fromiter(itertools.chain.from_iterable(objnames), dtype = 'S20') band = np.fromiter(itertools.chain.from_iterable(band), dtype = 'S16') mjd = np.fromiter(itertools.chain.from_iterable(mjd), dtype = 'float') mag = np.fromiter(itertools.chain.from_iterable(mag), dtype = 'float') magerr = np.fromiter(itertools.chain.from_iterable(magerr), dtype = 'float') stype = np.full(len(objnames), 1) LC = Lightcurve(objnames, band, mjd, mag, magerr, stype) return LC
def _computeNormalizations(self): #Use a generator instead of a list to gain speed generator1 = (x.value for x in self.parameters.values()[1::2]) self.alphas = numpy.fromiter(generator1,float) #alphasDiff = self.alphas[:-1]-self.alphas[1:] generator2 = (x.value for x in self.parameters.values()[2::2]) self.betas = numpy.fromiter(generator2,float) #betasDiff = self.betas[:-1]-self.betas[1:] #bLogEpivot = self.betas*self.logPivotEnergies #bLogEpivotDiff = bLogEpivot[1:]-bLogEpivot[:-1] self.normalizations[0] = self.parameters['K'].value self.normalizations[1:-1] = (self._logP(self.energyBreaks,self.alphas[:-1],self.betas[:-1],self.pivotEnergies[:-1])/ self._logP(self.energyBreaks,self.alphas[1:],self.betas[1:],self.pivotEnergies[1:]) ) self.normalizations[-1] = 1.0 #This compute the cumulative product of the array #(i.e., the first elements is a0, the second a0*a1, #the third a0*a1*a2, and so on...) self.products = numpy.cumprod(self.normalizations)
def test_vector(self): v1 = Vector(self.list1) v2 = Vector(2*x for x in self.list1) self.assertEqual(2*v1, v2) n1 = np.fromiter(v1, int) n2 = np.fromiter(v2, int) self.assertEqual(v1.dot(v2), np.dot(n1,n2))
def get_charge_resolution(self): """ Calculate and obtain the charge resolution graph arrays. Returns ------- true_charge : ndarray The X axis true charges. chargeres : ndarray The Y axis charge resolution values. chargeres_error : ndarray The error on the charge resolution. scaled_chargeres : ndarray The Y axis charge resolution divided by the Goal. scaled_chargeres_error : ndarray The error on the charge resolution divided by the Goal. """ log.debug('[chargeres] Calculating charge resolution') true_charge = np.fromiter(iter(self.sum_dict.keys()), dtype=int) summed_charge = np.fromiter(iter(self.sum_dict.values()), dtype=float) num = np.fromiter(iter(self.n_dict.values()), dtype=int) chargeres = np.sqrt((summed_charge / num) + true_charge) / true_charge chargeres_error = chargeres * (1 / np.sqrt(2 * num)) scale = self.goal(true_charge) scaled_chargeres = chargeres/scale scaled_chargeres_error = chargeres_error/scale return true_charge, chargeres, chargeres_error, \ scaled_chargeres, scaled_chargeres_error
def extract_surf(jpgfile): start = time.time() out = os.path.join(os.path.dirname(jpgfile), os.path.basename(jpgfile)[:-4] + 'surf.npy') if os.path.exists(out): INFO('%s already exists' % out) return im = cv.LoadImageM(jpgfile, cv.CV_LOAD_IMAGE_GRAYSCALE) INFO('cv loaded %dx%d image' % (im.rows, im.cols)) g, features = cv.ExtractSURF(im, None, cv.CreateMemStorage(), (0, 500, 3, 4)) data = np.ndarray(len(features), SURFReader.surf_dtype) for i in range(len(features)): data[i]['vec'] = np.fromiter(features[i], np.float32) data[i]['geom'] = np.fromiter([g[i][0][0], g[i][0][1], g[i][2]], np.uint16) data[i]['index'] = 0 ## Simple Quantization into bytes # for i in range(len(features)): # surfvalues = np.fromiter(features[i], np.float) # # assert max(surfvalues) <= 1.0 # assert min(surfvalues) >= -1.0 # # data[i]['vec'] = np.int8(127*surfvalues) # data[i]['geom'] = np.fromiter([g[i][0][0], g[i][0][1], g[i][2]], np.uint16) # data[i]['index'] = 0 save_atomic(lambda d: np.save(d, data), out) INFO('cv wrote %d features' % len(features)) INFO_TIMING('took %f seconds' % (time.time() - start))
def __init__(self, image): # number of points self.nx = int(image.shape[0]) self.ny = int(image.shape[1]) # spacing self.dx = 1.0 self.dy = 1.0 # limits self.xmin = 0 self.ymin = 0 self.xmax = float(self.nx) self.ymax = float(self.ny) # lengths self.lx = abs(self.xmax - self.xmin) self.ly = abs(self.ymax - self.ymin) # mesh self.y, self.x = np.meshgrid( np.fromiter(((0.5 + i) * self.dx for i in range(self.nx)), dtype=np.float64, count=self.nx), np.fromiter(((0.5 + i) * self.dy for i in range(self.ny)), dtype=np.float64, count=self.ny))
def get_world_endpoints(edges, pos, scale): """Returns the edge endpoints in homogeneous world coordinates Parameters ---------- edges : iterable of Edge pos : numpy array scale : float Returns ------- tuple of iterable of points a value in the form `(start_points, end_points)`, where `start_points` and `end_points` are in the form of a numpy matrix """ edge_starts = (coord for edge in edges for coord in chain(scale * edge.start + pos, (1.0, ))) edge_ends = (coord for edge in edges for coord in chain(scale * edge.end + pos, (1.0, ))) homo_starts = np.fromiter(edge_starts, np.float, count=4 * len(edges)) homo_ends = np.fromiter(edge_ends, np.float, count=4 * len(edges)) homo_starts = homo_starts.reshape((len(edges), 4)) homo_ends = homo_ends.reshape((len(edges), 4)) return homo_starts, homo_ends
def decodePacket(bin_data, data_size = None, packet_mode = 'i64u', track_t0 = False): # Works only for i64bit unpacked mode global compressed_t0 #assert (packet_mode == 'i64u') if packet_mode == 'i64u': data_size = len(bin_data)//ctypes.sizeof(Timetag_I64) t = ctypes.cast(bin_data, timetag_I64_p) time = np.fromiter((i.time for i in t), np.int64, data_size) channel = np.fromiter((i.channel for i in t), np.int8, data_size) if packet_mode == 'i64c': data_size = len(bin_data)//ctypes.sizeof(Timetag_I64c) t = ctypes.cast(bin_data, timetag_I64c_p) #if t[0].highlow == 0: # ctypes.cast(bin_data, timetag_I64c_p) highlow = np.fromiter((i.highlow for i in t ), np.uint64, data_size) time = np.fromiter((i.timehigh for i in t ), np.uint64, data_size)+(cumsum(highlow))*2**27 channel = np.fromiter((i.channel for i in t ), np.uint8, data_size) time = time[highlow == 0] channel = channel[highlow == 0] if track_t0: time = time + compressed_t0 compressed_t0 += sum(highlow)*2**27 else: track_t0 = 0 return(time, channel)
def CalculateMASE(train_guess, train_correct, test_guess, test_correct): """Calculates the Mean Absolute Scaled Error""" def CacluateNaive(train_correct): error = 0 c = 0 for t1, t2 in zip(train_correct[1:], train_correct): res = abs(t1-t2) if not numpy.isnan(res): error += res c += 1 return error/c try: abs_error_train = abs(train_guess - train_correct) abs_error_test = abs(test_guess - test_correct) except TypeError: #if they're the wrong type then convert them accordingly train_guess = numpy.fromiter(train_guess, numpy.float) train_correct = numpy.fromiter(train_correct, numpy.float) test_guess = numpy.fromiter(test_guess, numpy.float) test_correct = numpy.fromiter(test_correct, numpy.float) abs_error_train = abs(train_guess - train_correct) abs_error_test = abs(test_guess - test_correct) naive_scale = CacluateNaive(train_correct) train_scaled_errors = abs_error_train/naive_scale test_scaled_errors = abs_error_test/naive_scale train_mase = nanmean(train_scaled_errors) test_mase = nanmean(test_scaled_errors) return train_mase, test_mase
def recognize_batch(self, xs_list, recog_args, char_list, rnnlm=None): """E2E beam search. :param list xs_list: list of list of input acoustic feature arrays [[(T1_1, D), (T1_2, D), ...],[(T2_1, D), (T2_2, D), ...], ...] :param Namespace recog_args: argument Namespace containing options :param list char_list: list of characters :param torch.nn.Module rnnlm: language model module :return: N-best decoding results :rtype: list """ prev = self.training self.eval() ilens_list = [ np.fromiter((xx.shape[0] for xx in xs_list[idx]), dtype=np.int64) for idx in range(self.num_encs) ] # subsample frame xs_list = [[ xx[::self.subsample_list[idx][0], :] for xx in xs_list[idx] ] for idx in range(self.num_encs)] xs_list = [[ to_device(self, to_torch_tensor(xx).float()) for xx in xs_list[idx] ] for idx in range(self.num_encs)] xs_pad_list = [ pad_list(xs_list[idx], 0.0) for idx in range(self.num_encs) ] # 1. Encoder hs_pad_list, hlens_list = [], [] for idx in range(self.num_encs): hs_pad, hlens, _ = self.enc[idx](xs_pad_list[idx], ilens_list[idx]) hs_pad_list.append(hs_pad) hlens_list.append(hlens) # calculate log P(z_t|X) for CTC scores if recog_args.ctc_weight > 0.0: if self.share_ctc: lpz_list = [ self.ctc[0].log_softmax(hs_pad_list[idx]) for idx in range(self.num_encs) ] else: lpz_list = [ self.ctc[idx].log_softmax(hs_pad_list[idx]) for idx in range(self.num_encs) ] normalize_score = False else: lpz_list = None normalize_score = True # 2. Decoder hlens_list = [ torch.tensor(list(map(int, hlens_list[idx]))) for idx in range(self.num_encs) ] # make sure hlens is tensor y = self.dec.recognize_beam_batch( hs_pad_list, hlens_list, lpz_list, recog_args, char_list, rnnlm, normalize_score=normalize_score, ) if prev: self.train() return y
def _gen_episodes(router_type: str, one_out: bool, factory: RouterFactory, num_episodes: int, bar=None, sinks=None, random_seed=None) -> pd.DataFrame: G = factory.topology_graph nodes = sorted(G.nodes) n = len(nodes) amatrix = nx.convert_matrix.to_numpy_array(G, nodelist=nodes, weight=factory.edge_weight, dtype=np.float32) gstate = np.ravel(amatrix) best_transitions = defaultdict(dict) lengths = defaultdict(dict) for start_node in nodes: for finish_node in nodes: if start_node != finish_node and nx.has_path( G, start_node, finish_node): path = nx.dijkstra_path(G, start_node, finish_node, weight=factory.edge_weight) length = nx.dijkstra_path_length(G, start_node, finish_node, weight=factory.edge_weight) best_transitions[start_node][finish_node] = path[1] if len( path) > 1 else start_node lengths[start_node][finish_node] = length if sinks is None: sinks = nodes additional_inputs = None routers = {} node_dim = 1 if one_out else n for rid in nodes: router = factory._makeHandler(rid) update_network(router, G) routers[rid] = router if additional_inputs is None: additional_inputs = router.additional_inputs cols = ['addr', 'dst'] if 'ppo' in router_type: for inp in additional_inputs: cols += add_input_cols(inp['tag'], inp.get('dim', n)) cols += ['next_addr', 'addr_v_func'] else: if node_dim == 1: cols.append('neighbour') else: cols += get_neighbors_cols(node_dim) for inp in additional_inputs: cols += add_input_cols(inp['tag'], inp.get('dim', n)) if node_dim == 1: cols.append('predict') else: cols += get_target_cols(n) df = pd.DataFrame(columns=cols) if random_seed is not None: set_random_seed(random_seed) pkg_id = 1 episode = 0 while episode < num_episodes: dst = random.choice(sinks) cur = random.choice(only_reachable(G, dst, nodes)) router = routers[cur] out_nbrs = G.successors(router.id) nbrs = only_reachable(G, dst, out_nbrs) if len(nbrs) == 0: continue episode += 1 # ppo addition if 'ppo' in router_type: next_addr = best_transitions[cur][dst] full_path_length = -lengths[cur][dst] row = [cur[1], dst[1] ] + gstate.tolist() + [next_addr[1], full_path_length] df.loc[len(df)] = row else: pkg = Package(pkg_id, DEF_PKG_SIZE, dst, 0, None) state = list(router._getNNState(pkg, nbrs)) def plen_func(v): plen = nx.dijkstra_path_length(G, v, dst, weight=factory.edge_weight) elen = G.get_edge_data(cur, v)[factory.edge_weight] return -(plen + elen) if one_out: predict = np.fromiter(map(plen_func, nbrs), dtype=np.float32) state.append(predict) cat_state = np.concatenate([unsqueeze(y) for y in state], axis=1) for row in cat_state: df.loc[len(df)] = row else: predict = np.fromiter(map( lambda i: plen_func(('router', i)) if ('router', i) in nbrs else -INFTY, range(n)), dtype=np.float32) state.append(predict) state_ = [unsqueeze(y, 1) for y in state] # pprint.pprint(state_) cat_state = np.concatenate(state_) df.loc[len(df)] = cat_state if bar is not None: bar.update(1) return df
def f(data): s = "".join(f"{int(knot_hash(f'{data}-{i}'), 16):0128b}" for i in range(128)) return s.count("1"), label(np.fromiter(s, dtype=int).reshape(128, 128))[1]
def fromiter(iter, dtype, count=None): return copy(numpy.fromiter(iter, dtype, count))
def _section_mean_radii(tree_radii, section_begs, section_ends): """Returns the mean radius per section""" return np.fromiter( (np.mean(tree_radii[b: e]) for b, e in zip(section_begs, section_ends)), dtype=np.float )
def mantel(x, y, method='pearson', permutations=999, alternative='two-sided', strict=True, lookup=None): """Compute correlation between distance matrices using the Mantel test. The Mantel test compares two distance matrices by computing the correlation between the distances in the lower (or upper) triangular portions of the symmetric distance matrices. Correlation can be computed using Pearson's product-moment correlation coefficient or Spearman's rank correlation coefficient. As defined in [1]_, the Mantel test computes a test statistic :math:`r_M` given two symmetric distance matrices :math:`D_X` and :math:`D_Y`. :math:`r_M` is defined as .. math:: r_M=\\frac{1}{d-1}\\sum_{i=1}^{n-1}\\sum_{j=i+1}^{n} stand(D_X)_{ij}stand(D_Y)_{ij} where .. math:: d=\\frac{n(n-1)}{2} and :math:`n` is the number of rows/columns in each of the distance matrices. :math:`stand(D_X)` and :math:`stand(D_Y)` are distance matrices with their upper triangles containing standardized distances. Note that since :math:`D_X` and :math:`D_Y` are symmetric, the lower triangular portions of the matrices could equivalently have been used instead of the upper triangular portions (the current function behaves in this manner). If ``method='spearman'``, the above equation operates on ranked distances instead of the original distances. Statistical significance is assessed via a permutation test. The rows and columns of the first distance matrix (`x`) are randomly permuted a number of times (controlled via `permutations`). A correlation coefficient is computed for each permutation and the p-value is the proportion of permuted correlation coefficients that are equal to or more extreme than the original (unpermuted) correlation coefficient. Whether a permuted correlation coefficient is "more extreme" than the original correlation coefficient depends on the alternative hypothesis (controlled via `alternative`). Parameters ---------- x, y : DistanceMatrix or array_like Input distance matrices to compare. If `x` and `y` are both ``DistanceMatrix`` instances, they will be reordered based on matching IDs (see `strict` and `lookup` below for handling matching/mismatching IDs); thus they are not required to be in the same ID order. If `x` and `y` are ``array_like``, no reordering is applied and both matrices must have the same shape. In either case, `x` and `y` must be at least 3x3 in size *after* reordering and matching of IDs. method : {'pearson', 'spearman'} Method used to compute the correlation between distance matrices. permutations : int, optional Number of times to randomly permute `x` when assessing statistical significance. Must be greater than or equal to zero. If zero, statistical significance calculations will be skipped and the p-value will be ``np.nan``. alternative : {'two-sided', 'greater', 'less'} Alternative hypothesis to use when calculating statistical significance. The default ``'two-sided'`` alternative hypothesis calculates the proportion of permuted correlation coefficients whose magnitude (i.e. after taking the absolute value) is greater than or equal to the absolute value of the original correlation coefficient. ``'greater'`` calculates the proportion of permuted coefficients that are greater than or equal to the original coefficient. ``'less'`` calculates the proportion of permuted coefficients that are less than or equal to the original coefficient. strict : bool, optional If ``True``, raises a ``ValueError`` if IDs are found that do not exist in both distance matrices. If ``False``, any nonmatching IDs are discarded before running the test. See `n` (in Returns section below) for the number of matching IDs that were used in the test. This parameter is ignored if `x` and `y` are ``array_like``. lookup : dict, optional Maps each ID in the distance matrices to a new ID. Used to match up IDs across distance matrices prior to running the Mantel test. If the IDs already match between the distance matrices, this parameter is not necessary. This parameter is disallowed if `x` and `y` are ``array_like``. Returns ------- corr_coeff : float Correlation coefficient of the test (depends on `method`). p_value : float p-value of the test. n : int Number of rows/columns in each of the distance matrices, after any reordering/matching of IDs. If ``strict=False``, nonmatching IDs may have been discarded from one or both of the distance matrices prior to running the Mantel test, so this value may be important as it indicates the *actual* size of the matrices that were compared. Raises ------ ValueError If `x` and `y` are not at least 3x3 in size after reordering/matching of IDs, or an invalid `method`, number of `permutations`, or `alternative` are provided. TypeError If `x` and `y` are not both ``DistanceMatrix`` instances or ``array_like``. See Also -------- DistanceMatrix scipy.stats.pearsonr scipy.stats.spearmanr pwmantel Notes ----- The Mantel test was first described in [2]_. The general algorithm and interface are similar to ``vegan::mantel``, available in R's vegan package [3]_. ``np.nan`` will be returned for the p-value if `permutations` is zero or if the correlation coefficient is ``np.nan``. The correlation coefficient will be ``np.nan`` if one or both of the inputs does not have any variation (i.e. the distances are all constant) and ``method='spearman'``. References ---------- .. [1] Legendre, P. and Legendre, L. (2012) Numerical Ecology. 3rd English Edition. Elsevier. .. [2] Mantel, N. (1967). "The detection of disease clustering and a generalized regression approach". Cancer Research 27 (2): 209-220. PMID 6018555. .. [3] http://cran.r-project.org/web/packages/vegan/index.html Examples -------- Import the functionality we'll use in the following examples: >>> from skbio import DistanceMatrix >>> from skbio.stats.distance import mantel Define two 3x3 distance matrices: >>> x = DistanceMatrix([[0, 1, 2], ... [1, 0, 3], ... [2, 3, 0]]) >>> y = DistanceMatrix([[0, 2, 7], ... [2, 0, 6], ... [7, 6, 0]]) Compute the Pearson correlation between them and assess significance using a two-sided test with 999 permutations: >>> coeff, p_value, n = mantel(x, y) >>> round(coeff, 4) 0.7559 Thus, we see a moderate-to-strong positive correlation (:math:`r_M=0.7559`) between the two matrices. In the previous example, the distance matrices (``x`` and ``y``) have the same IDs, in the same order: >>> x.ids ('0', '1', '2') >>> y.ids ('0', '1', '2') If necessary, ``mantel`` will reorder the distance matrices prior to running the test. The function also supports a ``lookup`` dictionary that maps distance matrix IDs to new IDs, providing a way to match IDs between distance matrices prior to running the Mantel test. For example, let's reassign the distance matrices' IDs so that there are no matching IDs between them: >>> x.ids = ('a', 'b', 'c') >>> y.ids = ('d', 'e', 'f') If we rerun ``mantel``, we get the following error notifying us that there are nonmatching IDs (this is the default behavior with ``strict=True``): >>> mantel(x, y) Traceback (most recent call last): ... ValueError: IDs exist that are not in both distance matrices. If we pass ``strict=False`` to ignore/discard nonmatching IDs, we see that no matches exist between `x` and `y`, so the Mantel test still cannot be run: >>> mantel(x, y, strict=False) Traceback (most recent call last): ... ValueError: No matching IDs exist between the distance matrices. To work around this, we can define a ``lookup`` dictionary to specify how the IDs should be matched between distance matrices: >>> lookup = {'a': 'A', 'b': 'B', 'c': 'C', ... 'd': 'A', 'e': 'B', 'f': 'C'} ``lookup`` maps each ID to ``'A'``, ``'B'``, or ``'C'``. If we rerun ``mantel`` with ``lookup``, we get the same results as the original example where all distance matrix IDs matched: >>> coeff, p_value, n = mantel(x, y, lookup=lookup) >>> round(coeff, 4) 0.7559 ``mantel`` also accepts input that is ``array_like``. For example, if we redefine `x` and `y` as nested Python lists instead of ``DistanceMatrix`` instances, we obtain the same result: >>> x = [[0, 1, 2], ... [1, 0, 3], ... [2, 3, 0]] >>> y = [[0, 2, 7], ... [2, 0, 6], ... [7, 6, 0]] >>> coeff, p_value, n = mantel(x, y) >>> round(coeff, 4) 0.7559 It is import to note that reordering/matching of IDs (and hence the ``strict`` and ``lookup`` parameters) do not apply when input is ``array_like`` because there is no notion of IDs. """ if method == 'pearson': corr_func = pearsonr elif method == 'spearman': corr_func = spearmanr else: raise ValueError("Invalid correlation method '%s'." % method) if permutations < 0: raise ValueError("Number of permutations must be greater than or " "equal to zero.") if alternative not in ('two-sided', 'greater', 'less'): raise ValueError("Invalid alternative hypothesis '%s'." % alternative) x, y = _order_dms(x, y, strict=strict, lookup=lookup) n = x.shape[0] if n < 3: raise ValueError("Distance matrices must have at least 3 matching IDs " "between them (i.e., minimum 3x3 in size).") x_flat = x.condensed_form() y_flat = y.condensed_form() orig_stat = corr_func(x_flat, y_flat)[0] if permutations == 0 or np.isnan(orig_stat): p_value = np.nan else: perm_gen = (corr_func(x.permute(condensed=True), y_flat)[0] for _ in range(permutations)) permuted_stats = np.fromiter(perm_gen, np.float, count=permutations) if alternative == 'two-sided': count_better = (np.absolute(permuted_stats) >= np.absolute(orig_stat)).sum() elif alternative == 'greater': count_better = (permuted_stats >= orig_stat).sum() else: count_better = (permuted_stats <= orig_stat).sum() p_value = (count_better + 1) / (permutations + 1) return orig_stat, p_value, n
def l2_norm(ratings): return np.linalg.norm( np.fromiter(ratings.values(), dtype=np.float64))
def consensusAndVariantsForWindow(alnFile, refWindow, referenceContig, depthLimit, arrowConfig): """ High-level routine for calling the consensus for a window of the genome given a cmp.h5. Identifies the coverage contours of the window in order to identify subintervals where a good consensus can be called. Creates the desired "no evidence consensus" where there is inadequate coverage. """ winId, winStart, winEnd = refWindow logging.info("Arrow operating on %s" % reference.windowToString(refWindow)) if options.fancyChunking: # 1) identify the intervals with adequate coverage for arrow # consensus; restrict to intervals of length > 10 alnHits = U.readsInWindow(alnFile, refWindow, depthLimit=20000, minMapQV=arrowConfig.minMapQV, strategy="longest", stratum=options.readStratum, barcode=options.barcode) starts = np.fromiter((hit.tStart for hit in alnHits), np.int) ends = np.fromiter((hit.tEnd for hit in alnHits), np.int) intervals = kSpannedIntervals(refWindow, arrowConfig.minPoaCoverage, starts, ends, minLength=10) coverageGaps = holes(refWindow, intervals) allIntervals = sorted(intervals + coverageGaps) if len(allIntervals) > 1: logging.info("Usable coverage in %s: %r" % (reference.windowToString(refWindow), intervals)) else: allIntervals = [(winStart, winEnd)] # 2) pull out the reads we will use for each interval # 3) call consensusForAlignments on the interval subConsensi = [] variants = [] for interval in allIntervals: intStart, intEnd = interval intRefSeq = referenceContig[intStart:intEnd] subWin = subWindow(refWindow, interval) windowRefSeq = referenceContig[intStart:intEnd] alns = U.readsInWindow(alnFile, subWin, depthLimit=depthLimit, minMapQV=arrowConfig.minMapQV, strategy="longest", stratum=options.readStratum, barcode=options.barcode) clippedAlns_ = [aln.clippedTo(*interval) for aln in alns] clippedAlns = U.filterAlns(subWin, clippedAlns_, arrowConfig) if len([a for a in clippedAlns if a.spansReferenceRange(*interval) ]) >= arrowConfig.minPoaCoverage: logging.debug("%s: Reads being used: %s" % (reference.windowToString(subWin), " ".join( [str(hit.readName) for hit in alns]))) css = U.consensusForAlignments(subWin, intRefSeq, clippedAlns, arrowConfig) siteCoverage = U.coverageInWindow(subWin, alns) variants_ = U.variantsFromConsensus(subWin, windowRefSeq, css.sequence, css.confidence, siteCoverage, options.aligner, ai=None) filteredVars = filterVariants(options.minCoverage, options.minConfidence, variants_) # Annotate? if options.annotateGFF: annotateVariants(filteredVars, clippedAlns) variants += filteredVars # Dump? shouldDumpEvidence = \ ((options.dumpEvidence == "all") or (options.dumpEvidence == "variants") and (len(variants) > 0)) if shouldDumpEvidence: logging.info("Arrow does not yet support --dumpEvidence") # dumpEvidence(options.evidenceDirectory, # subWin, windowRefSeq, # clippedAlns, css) else: css = ArrowConsensus.noCallConsensus( arrowConfig.noEvidenceConsensus, subWin, intRefSeq) subConsensi.append(css) # 4) glue the subwindow consensus objects together to form the # full window consensus css = join(subConsensi) # 5) Return return css, variants
def str_array(iterable): return np.fromiter(iterable, 'U1')
def comb(n): return np.fromiter( itertools.chain.from_iterable(itertools.combinations(n, 2)), n.dtype).reshape((2, -1), order="F")
# Points as numpy arrays amount = 50 min_value = 0 max_value = 250 point: Tuple[float, float] = (random.uniform(min_value, max_value), random.uniform(min_value, max_value)) units: List[Tuple[float, float]] = [(random.uniform(min_value, max_value), random.uniform(min_value, max_value)) for _ in range(amount)] # Pre convert points to numpy array flat_units = [item for sublist in units for item in sublist] units_np = np.fromiter(flat_units, dtype=float, count=2 * len(units)).reshape( (-1, 2)) point_np = np.fromiter(point, dtype=float, count=2).reshape((-1, 2)) r1 = distance_matrix_scipy_cdist_squared(units, point).flatten() r2 = distance_numpy_basic_1(units, point) r3 = distance_numpy_basic_2(units, point) r4 = distance_numpy_einsum(units, point) # r5 = distance_numpy_basic_1_numba(units_np, point_np) # r6 = distance_numpy_basic_2_numba(units_np, point_np) # r10 = distance_numba(flat_units, point_np, len(flat_units) // 2) assert np.array_equal(r1, r2) assert np.array_equal(r1, r3) assert np.array_equal(r1, r4) # assert np.array_equal(r1, r5) # assert np.array_equal(r1, r6)
def num_array(iterable): return np.fromiter(iterable, int)
def _unpack_contents(self, raw_string, version): data = {} round6 = lambda x: round(x, ndigits=6) header_values = struct.unpack(self.header_fmt(version), raw_string[:self.header_size(version)]) for indx, field in enumerate(self.header_fields(version)): data[field] = header_values[indx] # handle Python 3 strings if (sys.version_info.major > 2) and isinstance(data[field], bytes): data[field] = data[field].decode('latin_1') data['timestamp'] = nt_to_unix((data['low_date'], data['high_date'])) if version == 0: data['transceivers'] = {} for field in ['transect_name', 'version', 'survey_name', 'sounder_name']: data[field] = data[field].strip('\x00') sounder_name = data['sounder_name'] if sounder_name == 'MBES': _me70_extra_values = struct.unpack('=hLff', data['spare0'][:14]) data['multiplexing'] = _me70_extra_values[0] data['time_bias'] = _me70_extra_values[1] data['sound_velocity_avg'] = _me70_extra_values[2] data['sound_velocity_transducer'] = _me70_extra_values[3] data['spare0'] = data['spare0'][:14] + data['spare0'][14:].strip('\x00') else: data['spare0'] = data['spare0'].strip('\x00') buf_indx = self.header_size(version) try: transducer_header = self._transducer_headers[sounder_name] _sounder_name_used = sounder_name except KeyError: log.warning('Unknown sounder_name: %s, (no one of %s)', sounder_name, list(self._transducer_headers.keys())) log.warning('Will use ER60 transducer config fields as default') transducer_header = self._transducer_headers['ER60'] _sounder_name_used = 'ER60' txcvr_header_fields = [x[0] for x in transducer_header] txcvr_header_fmt = '=' + ''.join([x[1] for x in transducer_header]) txcvr_header_size = struct.calcsize(txcvr_header_fmt) for txcvr_indx in range(1, data['transceiver_count'] + 1): txcvr_header_values_encoded = struct.unpack(txcvr_header_fmt, raw_string[buf_indx:buf_indx + txcvr_header_size]) txcvr_header_values = list(txcvr_header_values_encoded) for tx_idx, tx_val in enumerate(txcvr_header_values_encoded): if isinstance(tx_val, bytes): txcvr_header_values[tx_idx] = tx_val.decode() txcvr = data['transceivers'].setdefault(txcvr_indx, {}) if _sounder_name_used in ['ER60', 'ES60']: for txcvr_field_indx, field in enumerate(txcvr_header_fields[:17]): txcvr[field] = txcvr_header_values[txcvr_field_indx] txcvr['pulse_length_table'] = np.fromiter(list(map(round6, txcvr_header_values[17:22])), 'float') txcvr['spare1'] = txcvr_header_values[22] txcvr['gain_table'] = np.fromiter(list(map(round6, txcvr_header_values[23:28])), 'float') txcvr['spare2'] = txcvr_header_values[28] txcvr['sa_correction_table'] = np.fromiter(list(map(round6, txcvr_header_values[29:34])), 'float') txcvr['spare3'] = txcvr_header_values[34] txcvr['gpt_software_version'] = txcvr_header_values[35] txcvr['spare4'] = txcvr_header_values[36] elif _sounder_name_used == 'MBES': for txcvr_field_indx, field in enumerate(txcvr_header_fields): txcvr[field] = txcvr_header_values[txcvr_field_indx] else: raise RuntimeError('Unknown _sounder_name_used (Should not happen, this is a bug!)') txcvr['channel_id'] = txcvr['channel_id'].strip('\x00') txcvr['spare1'] = txcvr['spare1'].strip('\x00') txcvr['spare2'] = txcvr['spare2'].strip('\x00') txcvr['spare3'] = txcvr['spare3'].strip('\x00') txcvr['spare4'] = txcvr['spare4'].strip('\x00') txcvr['gpt_software_version'] = txcvr['gpt_software_version'].strip('\x00') buf_indx += txcvr_header_size elif version == 1: #CON1 only has a single data field: beam_config, holding an xml string data['beam_config'] = raw_string[self.header_size(version):].strip('\x00') return data
def create_networkx_graph(self, graph_name, graph_attributes): if self.is_valid_graph_attributes(graph_attributes): edge_names = [] redge_names = [] for k, v in graph_attributes['edgeCollections'].items(): edge_names.append(k) ens = k.split('_', 1) redge = ens[1] + '_' + ens[0] redge_names.append(redge) sgdata = {ename: nx.DiGraph() for ename in edge_names} rsgdata = {ename: nx.DiGraph() for ename in redge_names} nxg = nx.DiGraph() labels = [] node_data = {} print("Loading edge data...") for k, v in graph_attributes['edgeCollections'].items(): query = "FOR doc in %s " % (k) cspl = [s + ':' + 'doc.' + s for s in v] cspl.append('_id: doc._id') csps = ','.join(cspl) query = query + "RETURN { " + csps + "}" sgraph = sgdata[k] ens = k.split('_', 1) redge = ens[1] + '_' + ens[0] rsgraph = rsgdata[redge] cursor = self.db.aql.execute(query) for doc in cursor: nfrom = doc['_from'] nto = doc['_to'] sgraph.add_edge(nfrom, nto) sgraph.nodes[nfrom]['bipartite'] = 0 sgraph.nodes[nto]['bipartite'] = 1 rsgraph.add_edge(nto, nfrom) rsgraph.nodes[nfrom]['bipartite'] = 1 rsgraph.nodes[nto]['bipartite'] = 0 print("Loading vertex data...") vnames = [] for k, v in graph_attributes['vertexCollections'].items(): vnames.append(k) node_data[k] = list() query = "FOR doc in %s " % (k) cspl = [s + ':' + 'doc.' + s for s in v] cspl.append('_id: doc._id') csps = ','.join(cspl) query = query + "RETURN { " + csps + "}" cursor = self.db.aql.execute(query) for doc in cursor: exclude_attr = ['_id', '_key', 'node_id'] if k == 'incident': exclude_attr.append('reassigned') labels.append(doc['reassigned']) sdata = { k: v for k, v in doc.items() if k not in exclude_attr } ndvalues = np.fromiter(sdata.values(), dtype=int) #rndata = np.asarray(ndvalues, dtype = int) #v_data = th.from_numpy(rndata) node_data[k].append(ndvalues) print("Creating DGL Heterograph...") dict_desc = dict() for ename in edge_names: ens = ename.split('_', 1) redge = ens[1] + '_' + ens[0] fgk = (ens[0], ename, ens[1]) dict_desc[fgk] = nxg rgk = (ens[1], redge, ens[0]) dict_desc[fgk] = sgdata[ename] dict_desc[rgk] = rsgdata[redge] g = dgl.heterograph(dict_desc) for v in vnames: rndata = np.asarray(node_data[v], dtype=int) v_data = th.from_numpy(rndata) g.nodes[v].data['f'] = v_data return g, labels
def generate_plots(self, store_figs=True): """Generate all the plots and save them.""" # correct layout of samples_per_layer: # [numLayers, numNets, numIntervals, numRepetitions=1, numAlgorithms] # new layout of samplesPlot: # [numNets, numLayers, numIntervals, numAlgorithms] samples_plot = copy.deepcopy(self.samples_per_layer) samples_plot = np.squeeze(samples_plot, axis=3) samples_plot = np.swapaxes(samples_plot, 0, 1) # get keep_ratio per layer ref_idx = self.names.index("ReferenceNet") kr_per_layer = (self._samples[None, None, :, None] * samples_plot / samples_plot[:, :, :, ref_idx:ref_idx + 1]) # grapher stats num_layers = samples_plot.shape[1] num_layers = int(num_layers) layers = np.fromiter(range(num_layers), dtype=np.int) + 1 # data for sample sizes per layer plot with standard convention layers = np.tile(layers, (self._num_algorithms, 1)).transpose() layers = layers[np.newaxis, :, np.newaxis, :] # grapher labels y_label_error = f"{self.names_metrics[0]} Test Accuracy" y_label_error5 = f"{self.names_metrics[1]} Test Accuracy" y_label_loss = "Test Loss" # grapher stuff legend = copy.deepcopy(np.array(self.names)).tolist() colors = copy.deepcopy(np.array(self._colors)).tolist() title = ", ".join([ self.param["network"]["name"], self.param["generated"]["datasetTest"].replace("_", "-"), ]) def _do_graphs(x_label, x_data, tag): x_min = 1.0 - min(self._x_max, max(self._samples)) x_max = 1.0 - max(self._x_min, min(self._samples)) def _flip_data(arr): return 1.0 - arr # modify the xData to represent Prune Ratio... x_data = _flip_data(x_data) # y values ... acc = _flip_data(self.error) acc5 = _flip_data(self.error5) # global tag with test dataset global_tag_test = f"{self.global_tag}_{self.dataset_test}" # grapher initialization + plotting grapher_error = Grapher( x_values=x_data, y_values=acc, folder=self._plot_dir, file_name=global_tag_test + "_acc_" + tag + ".pdf", ref_idx=ref_idx, x_min=x_min, x_max=x_max, legend=legend, colors=colors, xlabel=x_label, ylabel=y_label_error, title=title, ) img_err = grapher_error.graph(percentage_x=True, percentage_y=True, store=store_figs) grapher_error5 = Grapher( x_values=x_data, y_values=acc5, folder=self._plot_dir, file_name=global_tag_test + "_acc5_" + tag + ".pdf", ref_idx=ref_idx, x_min=x_min, x_max=x_max, legend=legend, colors=colors, xlabel=x_label, ylabel=y_label_error5, title=title, ) img_err5 = grapher_error5.graph(percentage_x=True, percentage_y=True, store=store_figs) grapher_loss = Grapher( x_values=x_data, y_values=self.loss, folder=self._plot_dir, file_name=global_tag_test + "_loss_" + tag + ".pdf", ref_idx=ref_idx, x_min=x_min, x_max=x_max, legend=legend, colors=colors, xlabel=x_label, ylabel=y_label_loss, title=title, ) img_loss = grapher_loss.graph(percentage_x=True, store=store_figs) # also write images to Tensorboard if store_figs: self.log_image( self._writer_general, f"{self.dataset_test} Test {self.names_metrics[0]} {tag}", img_err, 0, ) self.log_image( self._writer_general, f"{self.dataset_test} Test {self.names_metrics[1]} {tag}", img_err5, 0, ) self.log_image( self._writer_general, self.dataset_test + "Test Loss" + tag, img_loss, 0, ) return grapher_error, grapher_error5, grapher_loss # keep a list of figures around graphers = [] # do parameter and flop plots graphers.extend(_do_graphs("Pruned Parameters", self.sizes, "param")) graphers.extend(_do_graphs("Pruned FLOPs", self.flops, "flops")) # do some layer-wise graphs title_layer = ", ".join([ self.param["network"]["name"], self.param["network"]["dataset"].replace("_", "-"), ]) def _do_layer_graph(x_label, y_label, y_data, tag, ref_idx=None): grapher_layer = Grapher( x_values=layers, y_values=y_data, folder=self._plot_dir, file_name=self.global_tag + f"_{tag}.pdf", ref_idx=ref_idx, x_min=np.min(layers), x_max=np.max(layers), legend=legend, colors=colors, xlabel=x_label, ylabel=y_label, title=title_layer, ) img_layer = grapher_layer.graph_histo(show_delta=ref_idx is not None, store=store_figs) if store_figs: self.log_image(self._writer_general, tag, img_layer, 0) return grapher_layer graphers.append( _do_layer_graph( "Budget Allocation over Layers", "Percentage of Budget", samples_plot, "samples", )) graphers.append( _do_layer_graph( "Prune Ratio per Layer", "Prune Ratio", 1 - kr_per_layer, "layer_pr", ref_idx, )) return graphers
def to_junction_tree_model(model, algorithm) -> JunctionizedModel: """Builds equivalent model on a junction tree. First, builds a junction tree using algorithm from NetworkX which uses Minimum Fill-in heuristic. Then, builds a new model in which variables correspond to nodes in junction tree - we will call them "supervariables". Values of new supervariables are encoded values of original variables. New alphabet size is original alphabet size to the power of maximaljunction size. If some supervariables have less variables than others, we just don't use all available for encoding "address space". We mark those impossible values as having probability 0 (i.e log probability -inf). Fields in new model are calculated by multiplying all field and interaction factors on variables in the same supervariable. While doing this, we make sure that every factor is counted only once. If some factor was accounted for in one supervariable field, it won't be accounted for again in other supervariables. Interaction factors in new model contain consistency requirement. If a variable of original model appears in multiple supervariables, we allow only those states where it takes the same value in all supervariables. We achieve that by using interaction factors which are equal to 1 if values of the same original variable in different supervariables are equal, and 0 if they are not equal. We actually use values 0 and -inf, because we work with logarithms. See https://en.wikipedia.org/wiki/Tree_decomposition. :param model: original model. :param algorithm: decomposition algorithm. :return: JunctionizedModel object, which contains junction tree and the new model, which is equivalent to original model, but whose graph is a tree. """ # Build junction tree. graph = model.get_graph() if algorithm == 'min_fill_in': tree_width, junc_tree = treewidth_min_fill_in(graph) elif algorithm == 'min_degree': tree_width, junc_tree = treewidth_min_degree(graph) elif algorithm == 'auto': tree_width_1, junc_tree_1 = treewidth_min_fill_in(graph) tree_width_2, junc_tree_2 = treewidth_min_degree(graph) if tree_width_1 < tree_width_2: tree_width, junc_tree = tree_width_1, junc_tree_1 else: tree_width, junc_tree = tree_width_2, junc_tree_2 else: raise ValueError('Unknown treewidth decomposition algorithm %s' % algorithm) jt_nodes = list(junc_tree.nodes()) sv_size = tree_width + 1 # Supervariable size. new_gr_size = len(jt_nodes) # New graph size. new_al_size = model.al_size**sv_size # New alphabet size. if new_al_size > 1e6: raise TooMuchStatesError("New domain size is too large: %d." % new_al_size) # Build edge list in terms of indices in new graph. nodes_lookup = {jt_nodes[i]: i for i in range(len(jt_nodes))} new_edges = np.array([[nodes_lookup[u], nodes_lookup[v]] for u, v in junc_tree.edges()]) # Convert node lists to numpy arrays. jt_nodes = [np.fromiter(node, dtype=np.int32) for node in jt_nodes] # Calculate fields which describe interaction beteen supervariables. # If supervariable has less than ``sv_size`` variables, pad with -inf. # Then, when decoding, we will just throw away values from the left. # We should account for each factor of the old graph in exactly one factor # in the new graph. So, for field and interaction factors of the old graph # we keep track of whether we already took them, and don't take them for # the second time. new_field = np.ones((new_gr_size, new_al_size), dtype=np.float64) * -np.inf used_node_fields = set() for new_node_id in range(new_gr_size): old_nodes = jt_nodes[new_node_id] node_field = model.get_subgraph_factor_values( old_nodes, vars_skip=used_node_fields) new_field[new_node_id, 0:len(node_field)] = node_field used_node_fields.update(old_nodes) # Now, for every edge in new graph - add interaction factor requiring that # the same variable appearing in two supervariables always has the same # values. # We achieve this by using Kroenker delta function. # As we working with logarithms, we populate -inf for impossible states, # and 0 for possible states. new_interactions = np.zeros((len(new_edges), new_al_size, new_al_size)) for edge_id in range(len(new_edges)): u, v = new_edges[edge_id] allowed = build_multi_delta(sv_size, model.al_size, jt_nodes[u], jt_nodes[v]) new_interactions[edge_id, np.logical_not(allowed)] = -np.inf from inferlo.pairwise.pwf_model import PairWiseFiniteModel new_model = PairWiseFiniteModel.create(new_field, new_edges, new_interactions) return JunctionizedModel(new_model, jt_nodes, model.gr_size, model.al_size)
def select_mesh_elems(context, mode, tool, tool_co, select_all_edges, select_all_faces): box_xmin = box_xmax = box_ymin = box_ymax = center = radius = lasso = None vert_co = verts_mask_visin = vis_edges_mask_in = edge_count = edges_mask_visin = None if tool == 0: box_xmin, box_xmax, box_ymin, box_ymax = tool_co elif tool == 1: center, radius = tool_co else: # shape == 2: lasso = tool_co region = context.region rv3d = context.region_data sel_obs = context.selected_objects if context.selected_objects else [ context.object ] for ob in sel_obs: if ob.type == 'MESH': mesh_select_mode = context.tool_settings.mesh_select_mode ob.update_from_editmode() me = ob.data bm = bmesh.from_edit_mesh(me) # VERT PASS #### if mesh_select_mode[0] or mesh_select_mode[ 1] or mesh_select_mode[2] and select_all_faces: verts = me.vertices vert_count = len(verts) # local coordinates of vertices vert_co_local = np.empty(vert_count * 3, "f") verts.foreach_get("co", vert_co_local) vert_co_local.shape = (vert_count, 3) # mask of visible vertices verts_mask_vis = np.empty(vert_count, "?") verts.foreach_get("hide", verts_mask_vis) verts_mask_vis = ~verts_mask_vis # local coordinates of visible vertices vis_vert_co_local = vert_co_local[verts_mask_vis] # world coordinates of visible vertices vis_vert_co_world = get_co_world_of_ob(ob, vis_vert_co_local) # 2d coordinates of visible vertices vert_co = np.empty((vert_count, 2), "f") vert_co[verts_mask_vis] = vis_vert_co = get_co_2d( region, rv3d, vis_vert_co_world) # mask of vertices inside the selection polygon from visible vertices # box select if tool == 0: vis_verts_mask_in = points_inside_rectangle( vis_vert_co, box_xmin, box_xmax, box_ymin, box_ymax) # circle select elif tool == 1: vis_verts_mask_in = points_inside_circle( vis_vert_co, center, radius) # lasso select else: vis_verts_mask_in = points_inside_polygon(vis_vert_co, lasso, prefilter=True) # mask of visible vertices inside the selection polygon from all vertices verts_mask_visin = np.full(vert_count, False, "?") verts_mask_visin[verts_mask_vis] = vis_verts_mask_in # do selection if mesh_select_mode[0]: select = get_mesh_selection_mask(verts, vert_count, verts_mask_visin, mode) select_list = select.tolist() for i, v in enumerate(bm.verts): v.select = select_list[i] # EDGE PASS #### if mesh_select_mode[1] or mesh_select_mode[2] and select_all_faces: edges = me.edges edge_count = len(edges) # for each edge get 2 indices of its vertices edge_vert_indices = np.empty(edge_count * 2, "i") edges.foreach_get("vertices", edge_vert_indices) edge_vert_indices.shape = (edge_count, 2) # mask of visible edges edges_mask_vis = np.empty(edge_count, "?") edges.foreach_get("hide", edges_mask_vis) edges_mask_vis = ~edges_mask_vis # for each visible edge get 2 vertex indices vis_edge_vert_indices = edge_vert_indices[edges_mask_vis] # for each visible edge get mask of vertices in the selection polygon vis_edge_verts_mask_in = verts_mask_visin[ vis_edge_vert_indices] # try to select edges that are completely inside the selection polygon if not select_all_edges: # mask of edges inside the selection polygon from visible edges vis_edges_mask_in = vis_edge_verts_mask_in[:, 0] & vis_edge_verts_mask_in[:, 1] # if select_all_edges enabled or no inner edges found # then select edges that intersect the selection polygon if select_all_edges or (not select_all_edges and not np.any(vis_edges_mask_in)) or \ (mesh_select_mode[2] and select_all_faces): # coordinates of vertices of visible edges vis_edge_vert_co = vert_co[vis_edge_vert_indices] # mask of edges from visible edges that have vertex inside the selection polygon and # should be selected vis_edges_mask_vert_in = vis_edge_verts_mask_in[:, 0] | vis_edge_verts_mask_in[:, 1] # selection polygon bbox # box select if tool == 0: xmin, xmax, ymin, ymax = box_xmin, box_xmax, box_ymin, box_ymax # circle select elif tool == 1: xmin, xmax, ymin, ymax = circle_bbox(center, radius) # lasso select else: xmin, xmax, ymin, ymax = polygon_bbox(lasso) # mask of edges from visible edges that have verts both laying outside of one of sides # of selection polygon bbox, so they can't intersect the selection polygon and # shouldn't be selected vis_edges_mask_cant_isect = segments_completely_outside_rectangle( vis_edge_vert_co, xmin, xmax, ymin, ymax) # mask of edges from visible edges that may intersect selection polygon and # should be tested for intersection vis_edges_mask_may_isect = ~vis_edges_mask_vert_in & ~vis_edges_mask_cant_isect # skip if there is no edges that need to be tested for intersection if np.any(vis_edges_mask_may_isect): # get coordinates of verts of visible edges that may intersect the selection polygon may_isect_vis_edge_co = vis_edge_vert_co[ vis_edges_mask_may_isect] # mask of edges that intersect the selection polygon from edges that may intersect it # box select if tool == 0: may_isect_vis_edges_mask_isect = segments_intersect_rectangle( may_isect_vis_edge_co, box_xmin, box_xmax, box_ymin, box_ymax) # circle select elif tool == 1: may_isect_vis_edges_mask_isect = segments_inside_or_intersect_circle( may_isect_vis_edge_co, center, radius) # lasso select else: may_isect_vis_edges_mask_isect = segments_intersect_polygon( may_isect_vis_edge_co, lasso) # mask of edges that intersect the selection polygon from visible edges vis_edges_mask_in = vis_edges_mask_vert_in vis_edges_mask_in[ vis_edges_mask_may_isect] = may_isect_vis_edges_mask_isect else: vis_edges_mask_in = vis_edges_mask_vert_in # mask of visible edges inside the selection polygon from all edges edges_mask_visin = np.full(edge_count, False, "?") edges_mask_visin[edges_mask_vis] = vis_edges_mask_in # do selection if mesh_select_mode[1]: select = get_mesh_selection_mask(edges, edge_count, edges_mask_visin, mode) select_list = select.tolist() for i, e in enumerate(bm.edges): e.select = select_list[i] # FACE PASS ##### if mesh_select_mode[2]: faces = me.polygons face_count = len(faces) # get mask of visible faces faces_mask_vis = np.empty(face_count, "?") faces.foreach_get("hide", faces_mask_vis) faces_mask_vis = ~faces_mask_vis # select faces which centers are inside the selection rectangle if not select_all_faces: # local coordinates of face centers face_center_co_local = np.empty(face_count * 3, "f") faces.foreach_get("center", face_center_co_local) face_center_co_local.shape = (face_count, 3) # local coordinates of visible face centers vis_face_center_co_local = face_center_co_local[ faces_mask_vis] # world coordinates of visible face centers vis_vert_co_world = get_co_world_of_ob( ob, vis_face_center_co_local) # 2d coordinates of visible face centers vis_face_center_co = get_co_2d(region, rv3d, vis_vert_co_world) # mask of face centers inside the selection polygon from visible faces # box select if tool == 0: vis_faces_mask_in = points_inside_rectangle( vis_face_center_co, box_xmin, box_xmax, box_ymin, box_ymax) # circle select elif tool == 1: vis_faces_mask_in = points_inside_circle( vis_face_center_co, center, radius) # lasso select else: vis_faces_mask_in = points_inside_polygon( vis_face_center_co, lasso, prefilter=True) # mask of visible faces inside the selection polygon from all faces faces_mask_visin = np.full(face_count, False, "?") faces_mask_visin[faces_mask_vis] = vis_faces_mask_in else: # mesh loops - edges that forms face polygons, sorted by face indices loops = me.loops loop_count = len(loops) # number of vertices for each face face_loop_totals = np.empty(face_count, "i") faces.foreach_get("loop_total", face_loop_totals) # skip getting faces from edges if there is no edges inside selection border in_edge_count = np.count_nonzero(edges_mask_visin) if in_edge_count: # getting faces from bmesh is faster when a low number of faces need to be # selected from a large number of total faces, otherwise numpy is faster ratio = edge_count / in_edge_count if ratio > 20: # bmesh pass visin_edge_indices = tuple( np.nonzero(edges_mask_visin)[0]) in_face_indices = [[ face.index for face in bm.edges[index].link_faces ] for index in visin_edge_indices] from itertools import chain in_face_indices = set( chain.from_iterable(in_face_indices)) c = len(in_face_indices) in_face_indices = np.fromiter( in_face_indices, "i", c) else: # numpy pass # indices of face edges loop_edge_indices = np.empty(loop_count, "i") loops.foreach_get("edge_index", loop_edge_indices) # index of face for each edge in mesh loop face_indices = np.arange(face_count) loop_face_indices = np.repeat( face_indices, face_loop_totals) # mask of visible edges in the selection polygon that are in mesh loops, # therefore forming face polygons in the selection border loop_edges_mask_visin = lookup_isin( loop_edge_indices, edges_mask_visin) # indices of faces inside the selection polygon in_face_indices = np.unique( loop_face_indices[loop_edges_mask_visin]) # mask of all faces in the selection polygon faces_mask_in = np.full(face_count, False, "?") faces_mask_in[in_face_indices] = np.True_ # mask of visible faces in the selection polygon faces_mask_visin = faces_mask_vis & faces_mask_in else: faces_mask_in = faces_mask_visin = np.full( face_count, False, "?") # FACE POLY PASS #### # select faces under cursor (faces that have the selection polygon inside their area) # visible faces not in the selection polygon faces_mask_visnoin = ~faces_mask_in & faces_mask_vis # number of vertices of each visible face not in the selection polygon visnoin_face_loop_totals = face_loop_totals[ faces_mask_visnoin] # skip if all faces are already selected if visnoin_face_loop_totals.size > 0: # box select if tool == 0: cursor_co = (box_xmax, box_ymin ) # bottom right box corner # circle select elif tool == 1: cursor_co = center # lasso select else: cursor_co = lasso[0] # indices of vertices of all faces face_vert_indices = np.empty(loop_count, "i") faces.foreach_get("vertices", face_vert_indices) # mask of vertices not in the selection polygon from face vertices face_verts_mask_visnoin = np.repeat( faces_mask_visnoin, face_loop_totals) # indices of vertices of visible faces not in the selection polygon visnoin_face_vert_indices = face_vert_indices[ face_verts_mask_visnoin] # coordinates of vertices of visible faces not in the selection polygon visnoin_face_vert_co = vert_co[ visnoin_face_vert_indices] # index of first face vertex in face vertex sequence visnoin_face_cell_starts = np.insert( visnoin_face_loop_totals[:-1].cumsum(), 0, 0) # mask of faces that have cursor inside their polygon area # from visible faces not in the selection polygon visnoin_faces_mask_under = point_inside_polygons( cursor_co, visnoin_face_vert_co, visnoin_face_cell_starts, None, visnoin_face_loop_totals, prefilter=True) # mask of visible faces under cursor from all faces faces_mask_visunder = np.full(face_count, False, "?") faces_mask_visunder[ faces_mask_visnoin] = visnoin_faces_mask_under # mask of visible faces in the selection polygon and under cursor faces_mask_visin[faces_mask_visunder] = np.True_ # do selection select = get_mesh_selection_mask(faces, face_count, faces_mask_visin, mode) select_list = select.tolist() for i, f in enumerate(bm.faces): f.select = select_list[i] # flush face selection after selecting/deselecting edges and vertices bm.select_flush_mode() bmesh.update_edit_mesh(me, loop_triangles=False, destructive=False)
def read_buffer(f): cells = [] cell_data = {"su2:tag": []} itype = "i8" ftype = "f8" dim = 0 next_tag_id = 0 expected_nmarkers = 0 markers_found = 0 while True: line = f.readline() if not line: # EOF break line = line.strip() if len(line) == 0: continue if line[0] == "%": continue try: name, rest_of_line = line.split("=") except ValueError: logging.warning( f"meshio could not parse line\n {line}\n skipping.....") continue if name == "NDIME": dim = int(rest_of_line) if dim != 2 and dim != 3: raise ReadError(f"Invalid dimension value {line}") elif name == "NPOIN": # according to documentation rest_of_line should just be a int, # and the next block should be just the coordinates of the points # However, some file have one or two extra indices not related to the # actual coordinates. # So lets read the next line to find its actual number of columns # first_line = f.readline() first_line = first_line.split() first_line = np.array(first_line, dtype=ftype) extra_columns = first_line.shape[0] - dim num_verts = int(rest_of_line.split()[0]) - 1 points = np.fromfile(f, count=num_verts * (dim + extra_columns), dtype=ftype, sep=" ").reshape(num_verts, dim + extra_columns) # save off any extra info if extra_columns > 0: first_line = first_line[:-extra_columns] points = points[:, :-extra_columns] # add the first line we read separately points = np.vstack([first_line, points]) elif name == "NELEM" or name == "MARKER_ELEMS": # we cannot? read at onece using numpy becasue we do not know the # total size. Read, instead next num_elems as is and re-use the # translate_cells function from vtk reader num_elems = int(rest_of_line) gen = islice(f, num_elems) # some files has an extra int column while other not # We do not need it so make sure we will skip it first_line_str = next(gen) first_line = first_line_str.split() nnodes = su2_type_to_numnodes[int(first_line[0])] has_extra_column = False if nnodes + 1 == len(first_line): has_extra_column = False elif nnodes + 2 == len(first_line): has_extra_column = True else: raise ReadError(f"Invalid number of columns for {name} field") # reset generator gen = chain([first_line_str], gen) cell_array = " ".join([line.rstrip("\n") for line in gen]) cell_array = np.fromiter(cell_array.split(), dtype=itype) cells_, cell_data_ = _translate_cells(cell_array, has_extra_column) for eltype, data in cells_.items(): cells.append(CellBlock(eltype, data)) num_block_elems = len(data) if name == "NELEM": cell_data["su2:tag"].append( np.full(num_block_elems, 0, dtype=np.int32)) else: tags = np.full(num_block_elems, next_tag_id, dtype=np.int32) cell_data["su2:tag"].append(tags) elif name == "NMARK": expected_nmarkers = int(rest_of_line) elif name == "MARKER_TAG": next_tag = rest_of_line try: next_tag_id = int(next_tag) except ValueError: next_tag_id += 1 logging.warning( "meshio does not support tags of string type.\n" " Surface tag {} will be replaced by {}".format( rest_of_line, next_tag_id)) markers_found += 1 if markers_found != expected_nmarkers: logging.warning( f"expected {expected_nmarkers} markes according to NMARK value " f"but found only {markers_found}") # merge boundary elements in a single cellblock per cell type if dim == 2: types = ["line"] else: types = ["triangle", "quad"] indices_to_merge = {} for t in types: indices_to_merge[t] = [] for index, cell_block in enumerate(cells): if cell_block.type in types: indices_to_merge[cell_block.type].append(index) cdata = cell_data["su2:tag"] for type, indices in indices_to_merge.items(): if len(indices) > 1: cells[indices[0]] = CellBlock( type, np.concatenate([cells[i].data for i in indices])) cdata[indices[0]] = np.concatenate([cdata[i] for i in indices]) # delete merged blocks idelete = [] for type, indices in indices_to_merge.items(): idelete += indices[1:] for i in sorted(idelete, reverse=True): del cells[i] del cdata[i] cell_data["su2:tag"] = cdata return Mesh(points, cells, cell_data=cell_data)
def generate_cooccurrence(segments_filename, labels_to_idx, rootdir='.'): fn = os.path.join(rootdir, segments_filename) # The audio file is not large, so no need to multiprocess # keys: image index, values: {label index, score} segmentlabels = {} # keys: [(label index, other label index)], values: score coo = {} # need 2 passes through the file, one is to get all the segment IDs # and map them to indexes # we don't have URLs to these easily so for the moment ignore any mapping segment_to_idx = {} with open(fn, 'r') as fh: idx = 0 for line in fh: if line.startswith("#"): continue tokens = line.strip().split(",") segment = tokens[0].strip() segment_to_idx[segment] = idx idx += 1 allsegments = [] with open(fn, 'r') as fh: csvreader = csv.reader(fh, delimiter=',', quotechar='"') for vals in csvreader: # File format is: """ # Segments csv created Sun Mar 5 10:54:31 2017 # num_ytids=22160, num_segs=22160, num_unique_labels=527, num_positive_labels=52882 # YTID, start_seconds, end_seconds, positive_labels --PJHxphWEs, 30.000, 40.000, "/m/09x0r,/t/dd00088" --ZhevVpy1s, 50.000, 60.000, "/m/012xff" --aE2O5G5WE, 0.000, 10.000, "/m/03fwl,/m/04rlf,/m/09x0r" --aO5cdqSAg, 30.000, 40.000, "/t/dd00003,/t/dd00005" """ if vals[0].startswith("#"): continue # fields we care about for now are: # ImageID (position 0) # LabelName (position 2) # Confidence (position 3) segmentname = vals[0] allsegments.append(segmentname) labels = vals[3:] # there is potentially more than 1 label segmentidx = segment_to_idx[segmentname] # keep track of what other labels are in this segment segmentlabels.setdefault(segmentidx, {}) for label in labels: # labels may be padded with spaces or have a quote character label = label.strip().replace('"', "").strip() labelidx = labels_to_idx[label] segmentlabels[segmentidx].setdefault(labelidx, 0) score = 1 # if it was found in the file at all segmentlabels[segmentidx][labelidx] += score for otherlabel in segmentlabels[segmentidx]: if labelidx == otherlabel: continue coo.setdefault((labelidx, otherlabel), 0) coo.setdefault((otherlabel, labelidx), 0) coo[(labelidx, otherlabel)] += score coo[(otherlabel, labelidx)] += score sorted_keys = sorted(coo.keys()) data = np.fromiter((coo[k] for k in sorted_keys), dtype=float, count=len(sorted_keys)) skarr = np.array(sorted_keys).T i = torch.LongTensor(skarr) v = torch.FloatTensor(data) nlabels = len(labels_to_idx) coo_torch = torch.sparse.FloatTensor(i, v, (nlabels, nlabels)) return coo_torch
def batch_sample(n, m, seed= 0): """yields `m` samples from `n` nats.""" stream = sample(n, seed) while True: yield np.fromiter(stream, np.int, m)
def sort_param( param: npt.ArrayLike, seperator: str = " ", check_duplicates: bool = True, ) -> NDArray[np.str_]: """Sort all atoms in an atom-based parameter set. Parameters represented by two atoms are simply sorted in alphabetical order. For parameters consisting of three atoms only the first and last atoms are sorted alphabetically. Parameters consisting of four or more atoms are not supported. Examples -------- .. code-block :: >>> from FOX.functions.sorting import sort_param >>> param1 = [ ... "Cd Cd", ... "Se Cd", ... "Se Se", ... ] >>> param2 = [ ... "Cd Cd Cd", ... "Se Cd Cd", ... "Se Se Se", ... ] >>> sort_param(param1) array(['Cd Cd', 'Cd Se', 'Se Se'], dtype='<U5') >>> sort_param(param2) array(['Cd Cd Cd', 'Cd Cd Se', 'Se Se Se'], dtype='<U8') Parameters ---------- param : array-like The to-be sorted parameters. seperator : :class:`str` The seperator used for splitting the atoms. check_duplicates : :class:`bool` Whether to check for duplicate elements after sorting the array. Returns ------- :class:`np.ndarray[np.str_] <numpy.ndarray>` A new array with the atoms sorted within each parameter. Raises ------ :exc:`ValueError` Raised when ``check_duplicates is True`` and duplicate parameters are present in the to-be returned array. """ atoms: NDArray[np.str_] = np.asarray(param) if atoms.dtype.kind != "U": raise TypeError( f"Expected a string array; observed dtype: {atoms.dtype}") elif atoms.size == 0: return atoms if atoms is not param else atoms.copy() atoms_split = np.array(np.char.split(atoms, seperator).tolist()) # Sort the atoms whenever dealing with atom-pair/triplet-based parameters n = atoms_split.shape[-1] if n == 1: ret = atoms if atoms is not param else atoms.copy() else: if n == 2: atoms_split.sort(axis=-1) elif n == 3: atoms_split[..., ::2].sort(axis=-1) else: raise NotImplementedError( f"Sorting parameters consisting of {n} atoms is not supported") iterator = (seperator.join(i) for i in atoms_split.reshape(-1, n)) ret = np.fromiter(iterator, dtype=atoms.dtype, count=atoms.size) ret.shape = atoms.shape # Check for duplicates if not check_duplicates: return ret unique, idx, counts = np.unique(ret, return_index=True, return_counts=True) is_duplicate = counts != 1 if is_duplicate.any(): duplicates = unique[is_duplicate] raise ValueError(f"Duplicate parameters: {duplicates}") return ret
def from_iterable(itr): return (np.fromiter(itr, float))
def _recombine(self, dpack, spacks): "join sentences by parsing their heads" unrelated_lbl = dpack.label_number(UNRELATED) # intra-sentential predictions sent_lbl = self._mk_get_lbl(dpack, spacks) if self._verbose: # check for lost and hallucinated intra- edges self._check_intra_edges(dpack, spacks) # call inter-sentential parser dpack_inter = self._select_heads(dpack, spacks) has_inter = len(dpack_inter) > 0 if has_inter: dpack_inter = self._parsers.inter.transform(dpack_inter) doc_lbl = self._mk_get_lbl(dpack, [dpack_inter]) def merged_lbl(i): """Doc label where relevant else sentence label. Returns ------- lbl: string Predicted document-level label, else sentence-level label ; UNRELATED for missing values. """ lbl = doc_lbl(i) if has_inter else None # missing document-level prediction: use sentence-level # prediction if lbl is None: lbl = sent_lbl(i) # fallback: it may have fallen through the cracks # (ie. may be neither in a sentence be a head) if lbl is None: lbl = unrelated_lbl return lbl # merge results prediction = np.fromiter((merged_lbl(i) for i in range(len(dpack))), dtype=np.dtype(np.int16)) graph = dpack.graph.tweak(prediction=prediction) dpack = dpack.set_graph(graph) if self._verbose: # check for hallucinated and lost inter edges inter_edges_pred = [(edu1.id, edu2.id, sent_lbl(i)) for i, (edu1, edu2) in enumerate(dpack.pairings) if (edu1.subgrouping != edu2.subgrouping and merged_lbl(i) != unrelated_lbl)] inter_edges_true = [(edu1.id, edu2.id, dpack.target[i]) for i, (edu1, edu2) in enumerate(dpack.pairings) if (edu1.subgrouping != edu2.subgrouping and dpack.target[i] != unrelated_lbl)] if set(inter_edges_true) != set(inter_edges_pred): print('Lost inter edges: {}'.format( sorted(set(inter_edges_true) - set(inter_edges_pred)))) print() print('Hallucinated inter edges: {}'.format( sorted(set(inter_edges_pred) - set(inter_edges_true)))) return dpack
def GridSplineToMesh2d(x, y, data, xi, yi, default_value=np.nan, plotonly=False, fill_nans=False): #{{{ ''' python analog to InterpFromGridToMesh. This routine uses scipy.interpolate.CloughTocher2dInterpolator to create a bivariate spline interpolation of the input data and then return values of the spline on the x,y coordinates of the model mesh. The interpolant is piece-wise cubic, C1 smooth (continuously differentiable) and has approximately minimized curvature. See "help(scipy.interpolate.CloughTocher2dInterpolator)" for more information on the routine. NOTE: this routine will not be appropriate if there are large holes (nan's) in the input data. A non-spline interpolation scheme should be used in that case. x,y: vectors defining the coordinates of the input data data: 2D array of input data xi,yi: x and y coordinates to be interpolated onto default_value: default value if points lie outside the convex hull of input points (defaults to nan if not specified) plotonly: plot the data to be interpolated using imshow (useful for fill_nans: fill nan's (holes) in data using the spline fit? Usage: interpdata=GridToMesh(x,y,data,xi,yi,default_value=np.nan,plotonly=False,fill_nans=False) Examples: interpdata=GridToMesh(x_m,y_m,data,md.mesh.x,md.mesh.y,0) ''' if np.ndim(x) == 2: x = x.reshape(-1, ) if np.ndim(y) == 2: y = y.reshape(-1, ) if len(x) != data.shape[1] + 1 and len(x) != data.shape[1]: raise ValueError( 'x should have same length as ncols(data) or ncols(data)+1') if len(y) != data.shape[0] + 1 and len(y) != data.shape[0]: raise ValueError( 'y should have same length as nrows(data) or nrows(data)+1') # create sub-grid that just covers the limits of xi and yi dx = x[1] - x[0] dy = y[1] - y[0] xlim = [min(xi) - dx, max(xi) + dx] ylim = [min(yi) - dy, max(yi) + dy] # TODO create grid differently depending on whether data is defined at x,y # or at the center of a grid cell with corner coordinates defined by xi,yi # create points array and flattened data array if len(x) == data.shape[1] and len(y) == data.shape[0]: print ' x,y taken to define the center of data grid cells' xind = np.nonzero(np.logical_and(x > xlim[0], x < xlim[1]))[0] yind = np.nonzero(np.logical_and(y > ylim[0], y < ylim[1]))[0] xg, yg = np.meshgrid(x[xind], y[yind]) subdata = data[yind[0]:yind[-1] + 1, xind[0]:xind[-1] + 1] elif len(x) == data.shape[1] + 1 and len(y) == data.shape[0] + 1: print ' x,y taken to define the corners of data grid cells' xcenter = np.fromiter( ((x[i] + x[i + 1]) / 2 for i in range(len(x) - 1)), np.float) ycenter = np.fromiter( ((y[i] + y[i + 1]) / 2 for i in range(len(y) - 1)), np.float) xind = np.nonzero(np.logical_and(xcenter > xlim[0], xcenter < xlim[1]))[0] yind = np.nonzero(np.logical_and(ycenter > ylim[0], ycenter < ylim[1]))[0] xg, yg = np.meshgrid(xcenter[xind], ycenter[yind]) subdata = data[yind[0]:yind[-1] + 1, xind[0]:xind[-1] + 1] else: raise ValueError( 'x and y have inconsistent sizes: both should have length ncols(data)/nrows(data) or ncols(data)+1/nrows(data)+1' ) points = np.array([xg.ravel(), yg.ravel()]).T flatsubdata = subdata.ravel() if plotonly: plt.imshow(np.flipud(subdata), origin='upper') plt.show() return # mask out any nan's in the data and corresponding coordinate points mask = np.isnan(flatsubdata) ind = np.nonzero(mask)[0] if len(ind) and fill_nans: print " WARNING: filling nans using spline fit through good data points, which may or may not be appropriate. Check results carefully." goodsubdata = np.delete(flatsubdata, ind) goodpoints = np.delete(points, ind, axis=0) # create spline and index spline at mesh points spline = CloughTocher2DInterpolator(goodpoints, goodsubdata) interpdata = spline(xi, yi) if not fill_nans: # identify nan's in xi,yi using nearest neighbors xyinterp = np.dstack([xi, yi])[0] xydata = np.dstack([xg.ravel(), yg.ravel()])[0] tree = cKDTree(xydata) nearest = tree.query(xyinterp)[1] pos = np.nonzero(np.isnan(flatsubdata[nearest])) interpdata[pos] = flatsubdata[nearest][pos] return interpdata
def ebrisk(rupgetter, srcfilter, param, monitor): """ :param rupgetter: a RuptureGetter instance :param srcfilter: a SourceFilter instance :param param: a dictionary of parameters :param monitor: :class:`openquake.baselib.performance.Monitor` instance :returns: an ArrayWrapper with shape (E, L, T, ...) """ crmodel = param['crmodel'] lba = param['lba'] E = rupgetter.num_events L = len(lba.loss_names) N = len(srcfilter.sitecol.complete) e1 = rupgetter.first_event with monitor('getting assets', measuremem=False): with datastore.read(srcfilter.filename) as dstore: assetcol = dstore['assetcol'] assets_by_site = assetcol.assets_by_site() A = len(assetcol) getter = getters.GmfGetter(rupgetter, srcfilter, param['oqparam']) with monitor('getting hazard'): getter.init() # instantiate the computers hazard = getter.get_hazard_by_sid() # sid -> (sid, eid, gmv) mon_risk = monitor('computing risk', measuremem=False) mon_agg = monitor('aggregating losses', measuremem=False) events = rupgetter.get_eid_rlz() # numpy.testing.assert_equal(events['eid'], sorted(events['eid'])) eid2idx = dict(zip(events['eid'], range(e1, e1 + E))) tagnames = param['aggregate_by'] shape = assetcol.tagcol.agg_shape((E, L), tagnames) elt_dt = [('event_id', U64), ('rlzi', U16), ('loss', (F32, shape[1:]))] if param['asset_loss_table']: alt = numpy.zeros((A, E, L), F32) acc = numpy.zeros(shape, F32) # shape (E, L, T...) # NB: IMT-dependent weights are not supported in ebrisk times = numpy.zeros(N) # risk time per site_id num_events_per_sid = 0 epspath = param['epspath'] gmf_nbytes = 0 for sid, haz in hazard.items(): gmf_nbytes += haz.nbytes t0 = time.time() assets_on_sid = assets_by_site[sid] if len(assets_on_sid) == 0: continue num_events_per_sid += len(haz) if param['avg_losses']: weights = getter.weights[[ getter.eid2rlz[eid] for eid in haz['eid'] ]] assets_by_taxo = get_assets_by_taxo(assets_on_sid, epspath) eidx = numpy.array([eid2idx[eid] for eid in haz['eid']]) - e1 haz['eid'] = eidx + e1 with mon_risk: out = get_output(crmodel, assets_by_taxo, haz) with mon_agg: for a, asset in enumerate(assets_on_sid): aid = asset['ordinal'] tagi = asset[tagnames] if tagnames else () tagidxs = tuple(idx - 1 for idx in tagi) losses_by_lt = {} for lti, lt in enumerate(crmodel.loss_types): lratios = out[lt][a] if lt == 'occupants': losses = lratios * asset['occupants_None'] else: losses = lratios * asset['value-' + lt] if param['asset_loss_table']: alt[aid, eidx, lti] = losses losses_by_lt[lt] = losses for loss_idx, losses in lba.compute(asset, losses_by_lt): acc[(eidx, loss_idx) + tagidxs] += losses if param['avg_losses']: lba.losses_by_A[aid, loss_idx] += (losses @ weights * param['ses_ratio']) times[sid] = time.time() - t0 if hazard: num_events_per_sid /= len(hazard) with monitor('building event loss table'): elt = numpy.fromiter( ( (event['eid'], event['rlz'], losses) # losses (L, T...) for event, losses in zip(events, acc) if losses.sum()), elt_dt) agg = general.AccumDict(accum=numpy.zeros(shape[1:], F32)) # rlz->agg for rec in elt: agg[rec['rlzi']] += rec['loss'] * param['ses_ratio'] res = { 'elt': elt, 'agg_losses': agg, 'times': times, 'events_per_sid': num_events_per_sid, 'gmf_nbytes': gmf_nbytes } res['losses_by_A'] = lba.losses_by_A if param['asset_loss_table']: eidx = numpy.array([eid2idx[eid] for eid in events['eid']]) res['alt_eidx'] = alt, eidx return res
def _recombine(self, dpack, spacks): """Parse a document using partial parses for each subgroup. The current implementation behaves like the SoftParser, requiring a global model to score both the (almost fixed) intra edges and the inter edges. Parameters ---------- dpack : DataPack Datapack for the whole document spacks : list of DataPack List of datapacks, one per subgroup (sentence). Returns ------- dpack : DataPack Datapack for the whole document, filled with a parse. """ unrelated_lbl = dpack.label_number(UNRELATED) # intra-sentential predictions sent_lbl = self._mk_get_lbl(dpack, spacks) if self._verbose: # check for lost and hallucinated intra- edges print('>>> check intra 1 >>>') self._check_intra_edges(dpack, spacks) print('<<< end check intra 1 <<<') # fix intra-sentential decisions before the inter-sentential phase dpack = self._fix_intra_edges(dpack, spacks) # call inter-sentential parser dpack_inter = self._select_frontiers(dpack, spacks) has_inter = len(dpack_inter) > 0 if has_inter: # collect indices of inter pairings in dpack_inter # so we can instruct the inter parser to keep its nose # out of intra stuff inter_indices = idxes_inter(dpack_inter, include_fake_root=True) dpack_inter = self._parsers.inter.transform( dpack_inter, nonfixed_pairs=inter_indices) doc_lbl = self._mk_get_lbl(dpack, [dpack_inter]) def merged_lbl(i): """Doc label where relevant else sentence label. Returns ------- lbl: string Predicted document-level label, else sentence-level label ; UNRELATED for missing values. """ lbl = doc_lbl(i) if has_inter else None # missing document-level prediction: use sentence-level # prediction if lbl is None: lbl = sent_lbl(i) # fallback: it may have fallen through the cracks # (ie. may be neither in a sentence be a head) if lbl is None: lbl = unrelated_lbl return lbl # merge results prediction = np.fromiter((merged_lbl(i) for i in range(len(dpack))), dtype=np.dtype(np.int16)) graph = dpack.graph.tweak(prediction=prediction) dpack = dpack.set_graph(graph) if self._verbose: # 2nd check for lost and hallucinated intra- edges print('>>> check intra 2 >>>') self._check_intra_edges(dpack, spacks) print('<<< end check intra 2 <<<') # check for lost and hallucinated inter- edges # TODO turn into _check_inter_edges inter_edges_pred = [(edu1.id, edu2.id, merged_lbl(i)) for i, (edu1, edu2) in enumerate(dpack.pairings) if (edu1.subgrouping != edu2.subgrouping and merged_lbl(i) != unrelated_lbl)] inter_edges_true = [(edu1.id, edu2.id, dpack.target[i]) for i, (edu1, edu2) in enumerate(dpack.pairings) if (edu1.subgrouping != edu2.subgrouping and dpack.target[i] != unrelated_lbl)] if set(inter_edges_true) != set(inter_edges_pred): print('Lost inter edges: {}'.format( sorted(set(inter_edges_true) - set(inter_edges_pred)))) print() print('Hallucinated inter edges: {}'.format( sorted(set(inter_edges_pred) - set(inter_edges_true)))) return dpack
import numpy as np def readwords(): with open('input/day1.txt') as f: for line in f.read().splitlines(): yield int(line.strip()) inp = np.fromiter(readwords(), int) increased = inp[1:] > inp[0:-1] print(sum(increased)) # part two sums = inp[0:-2] + inp[1:-1] + inp[2:] increased = sums[1:] > sums[0:-1] print(sum(increased))
def MakeNdarray(tensor): """Create a numpy ndarray from a tensor. Create a numpy ndarray with the same shape and data as the tensor. Args: tensor: A TensorProto. Returns: A numpy array with the tensor contents. Raises: TypeError: if tensor has unsupported type. """ shape = [d.size for d in tensor.tensor_shape.dim] num_elements = np.prod(shape) tensor_dtype = dtypes.as_dtype(tensor.dtype) dtype = tensor_dtype.as_numpy_dtype if tensor.tensor_content: return np.fromstring(tensor.tensor_content, dtype=dtype).reshape(shape) elif tensor_dtype == dtypes.float16: # the half_val field of the TensorProto stores the binary representation # of the fp16: we need to reinterpret this as a proper float16 if len(tensor.half_val) == 1: tmp = np.array(tensor.half_val[0], dtype=np.uint16) tmp.dtype = np.float16 return np.repeat(tmp, num_elements).reshape(shape) else: tmp = np.fromiter(tensor.half_val, dtype=np.uint16) tmp.dtype = np.float16 return tmp.reshape(shape) elif tensor_dtype == dtypes.float32: if len(tensor.float_val) == 1: return np.repeat(np.array(tensor.float_val[0], dtype=dtype), num_elements).reshape(shape) else: return np.fromiter(tensor.float_val, dtype=dtype).reshape(shape) elif tensor_dtype == dtypes.float64: if len(tensor.double_val) == 1: return np.repeat(np.array(tensor.double_val[0], dtype=dtype), num_elements).reshape(shape) else: return np.fromiter(tensor.double_val, dtype=dtype).reshape(shape) elif tensor_dtype in [ dtypes.int32, dtypes.uint8, dtypes.uint16, dtypes.int16, dtypes.int8, dtypes.qint32, dtypes.quint8, dtypes.qint8, dtypes.qint16, dtypes.quint16, dtypes.bfloat16 ]: if len(tensor.int_val) == 1: return np.repeat(np.array(tensor.int_val[0], dtype=dtype), num_elements).reshape(shape) else: return np.fromiter(tensor.int_val, dtype=dtype).reshape(shape) elif tensor_dtype == dtypes.int64: if len(tensor.int64_val) == 1: return np.repeat(np.array(tensor.int64_val[0], dtype=dtype), num_elements).reshape(shape) else: return np.fromiter(tensor.int64_val, dtype=dtype).reshape(shape) elif tensor_dtype == dtypes.string: if len(tensor.string_val) == 1: return np.repeat(np.array(tensor.string_val[0], dtype=dtype), num_elements).reshape(shape) else: return np.array([x for x in tensor.string_val], dtype=dtype).reshape(shape) elif tensor_dtype == dtypes.complex64: it = iter(tensor.scomplex_val) if len(tensor.scomplex_val) == 2: return np.repeat( np.array(complex(tensor.scomplex_val[0], tensor.scomplex_val[1]), dtype=dtype), num_elements).reshape(shape) else: return np.array([complex(x[0], x[1]) for x in zip(it, it)], dtype=dtype).reshape(shape) elif tensor_dtype == dtypes.complex128: it = iter(tensor.dcomplex_val) if len(tensor.dcomplex_val) == 2: return np.repeat( np.array(complex(tensor.dcomplex_val[0], tensor.dcomplex_val[1]), dtype=dtype), num_elements).reshape(shape) else: return np.array([complex(x[0], x[1]) for x in zip(it, it)], dtype=dtype).reshape(shape) elif tensor_dtype == dtypes.bool: if len(tensor.bool_val) == 1: return np.repeat(np.array(tensor.bool_val[0], dtype=dtype), num_elements).reshape(shape) else: return np.fromiter(tensor.bool_val, dtype=dtype).reshape(shape) else: raise TypeError("Unsupported tensor type: %s" % tensor.dtype)
from mpl_toolkits.mplot3d import Axes3D try: import numpy as np except: exit() from deap import benchmarks def schwefel_arg0(sol): return benchmarks.schwefel(sol)[0] fig = plt.figure() # ax = Axes3D(fig, azim = -29, elev = 50) ax = Axes3D(fig) X = np.arange(-500, 500, 10) Y = np.arange(-500, 500, 10) X, Y = np.meshgrid(X, Y) Z = np.fromiter(map(schwefel_arg0, zip(X.flat, Y.flat)), dtype=np.float, count=X.shape[0] * X.shape[1]).reshape(X.shape) ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.jet, linewidth=0.2) plt.xlabel("x") plt.ylabel("y") plt.show()
def k_array_constructor(list_of_parameters): """ Constructs Δy (k_array) for every component. """ return np.fromiter( (h * f(list_of_parameters) for f in derivatives), np.float, n) # numpy.array from iterator, dimension: n