Exemple #1
0
def nn_coll_adjusted(person, jokeId):
    # YASH IS WORKING ON THIS
    # N = 10
    # sum = 0.0
    # simSum = 0.0
    # avg = nn_item_average(person, jokeId)
    # nearestNeighbors = nNN_users(N, person, jokeId)
    # for n in range(len(nearestNeighbors)):
    #     simSum += nearestNeighbors[n][0]  # computing K
    #     sum += nearestNeighbors[n][0] * (rawRatings[nearestNeighbors[n][1], jokeId - 1] - avg)
    #
    # k = 1.0 / float(simSum)
    # adjusted = avg + (k * sum)
    # return adjusted
    N = 10
    nearestNeighbors = nNN_users(N, person, jokeId)
    avg = nn_item_average(person, jokeId)
    k = computeK(person, jokeId, avg)

    adjWeightGen = ( rawRatings[ nearestNeighbors[n][1], jokeId - 1 ] - avg for n in range(N) )
    adjWeights = np.fromiter(adjWeightGen, dtype='float_', count=N)

    nnGen = ( nearestNeighbors[n][0] for n in range(N) )
    ratings = np.fromiter(nnGen, dtype='float_', count=N)
    vals = ratings * adjWeights

    return avg + k * float(np.sum(vals))
    def function(self, simulation, period):
        period = period.start.period(u'month').offset('first-of')
        depcom_entreprise = simulation.calculate('depcom_entreprise', period)
        effectif_entreprise = simulation.calculate('effectif_entreprise', period)
        categorie_salarie = simulation.calculate('categorie_salarie', period)

        seuil_effectif = simulation.legislation_at(period.start).cotsoc.versement_transport.seuil_effectif

        preload_taux_versement_transport()
        public = (categorie_salarie >= 2)
        default_value = 0.0
        taux_aot = fromiter(
            (
                taux_aot_by_depcom.get(depcom_cell, default_value)
                for depcom_cell in depcom_entreprise
                ),
            dtype = 'float',
            )
        taux_smt = fromiter(
            (
                taux_smt_by_depcom.get(depcom_cell, default_value)
                for depcom_cell in depcom_entreprise
                ),
            dtype = 'float',
            )
        # "L'entreprise emploie-t-elle plus de 9 salariés  dans le périmètre de l'Autorité organisatrice de transport
        # (AOT) suivante ou syndicat mixte de transport (SMT)"
        return period, (taux_aot + taux_smt) * or_(effectif_entreprise >= seuil_effectif, public) / 100
Exemple #3
0
def test_sqlite():
    enter()
    sqlquery = "(f2>.9) and ((f8>.3) and (f8<.4))"  # the query

    con = sqlite3.connect(":memory:")

    # Create table
    fields = "(%s)" % ",".join(["f%d real"%i for i in range(NC)])
    con.execute("create table bench %s" % fields)

    # Insert a NR rows of data
    vals = "(%s)" % ",".join(["?" for i in range(NC)])
    with con:
        con.executemany("insert into bench values %s" % vals,
                        (mv+np.random.rand(NC)-mv for i in xrange(int(NR))))
    after_create()

    out = np.fromiter(
        (row for row in con.execute(
        "select f1, f3 from bench where %s" % sqlquery)),
        dtype="f8,f8")
    after_query("non-indexed")

    # Create indexes
    con.execute("create index f1idx on bench (f1)")
    con.execute("create index f2idx on bench (f8)")
    after_create("index")

    out = np.fromiter(
        (row for row in con.execute(
        "select f1, f3 from bench where %s" % sqlquery)),
        dtype="f8,f8")
    after_query("indexed")

    return out
Exemple #4
0
def decode4js(obj):
    """
    return decoded Python object from encoded object.
    """
    out = obj
    if isinstance(obj, dict):
        classname = obj.pop('__class__', None)
        if classname is None:
            return obj
        elif classname == 'Complex':
            out = obj['value'][0] + 1j*obj['value'][1]
        elif classname in ('List', 'Tuple'):
            out = []
            for item in obj['value']:
                out.append(decode4js(item))
            if classname == 'Tuple':
                out = tuple(out)
        elif classname == 'Array':
            if obj['__dtype__'].startswith('complex'):
                re = np.fromiter(obj['value'][0], dtype='double')
                im = np.fromiter(obj['value'][1], dtype='double')
                out = re + 1j*im
            else:
                out = np.fromiter(obj['value'], dtype=obj['__dtype__'])
            out.shape = obj['__shape__']
        elif classname in ('Dict', 'Parameter', 'Group'):
            out = {}
            for key, val in obj.items():
                out[key] = decode4js(val)
            if classname == 'Parameter':
                out = Parameter(**out)
            elif classname == 'Group':
                out = Group(**out)
    return out
Exemple #5
0
def _stats_to_movie_results(bam_stats, movie_names):
    """
    Separate out per-movie results from process stats.
    """
    results = []
    movies = sorted(list(movie_names))
    for movie_name in movies:
        def _base_calls():
            for r in bam_stats:
                if r.movieName == movie_name:
                    yield r.qLen

        def _num_passes():
            for r in bam_stats:
                if r.movieName == movie_name:
                    yield r.numPasses

        def _accuracy():
            for r in bam_stats:
                if r.movieName == movie_name:
                    yield r.readScore

        read_lengths = np.fromiter(_base_calls(), dtype=np.int64, count=-1)
        num_passes = np.fromiter(_num_passes(), dtype=np.int64, count=-1)
        accuracy = np.fromiter(_accuracy(), dtype=np.float, count=-1)

        results.append(MovieResult(
            movie_name, read_lengths, accuracy, num_passes))
    return results
Exemple #6
0
def _read_outcomes(match):
    """Read gambit outcome format"""
    role_names = _string_list(match.group('roles'))
    num_roles = len(role_names)
    strat_names = [_string_list(m.group()) for m
                   in _RE_STRATS.finditer(match.group('strats')[1:-1])]
    utils.check(
        len(strat_names) == num_roles,
        'player names and strategies differed in length')
    num_strats = np.fromiter(map(len, strat_names), int, num_roles)

    outcomes = [np.zeros(num_roles)]
    for omatch in _RE_OUTCOME.finditer(match.group('outcomes')[1:-1]):
        outcome = omatch.group()[1:-1]
        pays = outcome[next(_RE_STR.finditer(outcome)).end():].split()
        utils.check(
            len(pays) == num_roles, 'outcome has wrong number of payoffs')
        outcomes.append(np.fromiter(  # pragma: no branch
            (float(s.rstrip(',')) for s in pays), float, num_roles))
    outcomes = np.stack(outcomes)

    inds = match.group('inds').split()
    utils.check(len(inds) == num_strats.prod(), 'wrong number of outcomes')
    inds = np.fromiter(map(int, inds), int, len(inds))

    matrix = np.empty(tuple(num_strats) + (num_roles,))
    tinds = tuple(range(num_roles - 1, -1, -1)) + (num_roles,)
    np.transpose(matrix, tinds).flat = outcomes[inds]
    return _normalize(role_names, strat_names, matrix)
def calculate_switch_stats(mappable, linkage_map_file, linkage_map_format, MST_grouping_threshold):
    genotypes_of_locus = mappable
    if linkage_map_format.lower() == 'mst':
        ini_map, loci_on_lg = parse_map_file_MST(linkage_map_file)
    elif linkage_map_format.lower() == 'rqtl':   
        ini_map, loci_on_lg = parse_map_file_rqtl(linkage_map_file)
    else:
        raise ValueError("unknown linkage_map_format")
    
    int_arr = convert_genotypes_to_int_array(genotypes_of_locus, ini_map)
    num_loci = int_arr.shape[0]
    num_pairs =  int((num_loci * (num_loci-1))/2)
    pairs = itertools.combinations(int_arr, 2)
    R = numpy.fromiter(getR(pairs), dtype = numpy.float64, count = num_pairs)
    pairs = itertools.combinations(int_arr, 2)
    NR = numpy.fromiter(getNR(pairs), dtype = numpy.float64, count = num_pairs)
    ml_R_frac = get_ml_R_frac(R = R, NR = NR)
    Z = get_LOD(R = R, NR = NR, R_frac = ml_R_frac)
    NR_matrix = get_NR_matrix(NR)
    #rf = get_rf_matrix(ml_R_frac)
    lod = get_lod_matrix(Z)
    index_of_lg = get_index_of_LG(loci_on_lg)
    lgs_longer_than_1 = find_LGs_with_multiple_loci(index_of_lg, loci_on_lg)
    #mean_rf = get_LG_pairwise_mean_rf(lgs_longer_than_1, rf, index_of_lg)
    #mean_lod = get_LG_pairwise_mean_lod(lgs_longer_than_1,lod, index_of_lg)
    sum_lod = get_LG_pairwise_sum_lod(lgs_longer_than_1,lod, index_of_lg)
    sq_sum_lod = get_square_form(sum_lod, lgs_longer_than_1)
    n = len(mappable.items()[0][1]) #number of individuals
    NR_threshold = get_threshold_recombinants_for_same_LGs(n, MST_grouping_threshold)
    NR_under_threshold = get_LG_pairwise_count_NR_threshold(lgs_longer_than_1, NR_matrix, index_of_lg, threshold = NR_threshold)
    sq_NR_matrix = get_square_form(NR_under_threshold, lgs_longer_than_1)
    return(ini_map, sq_sum_lod, sq_NR_matrix, R, NR, lgs_longer_than_1)
Exemple #8
0
    def __call__(self, data):
        if isinstance(data, Instance):
            return self.negate != (data[self.column] == self.value)
        if isinstance(data, Storage):
            try:
                return data._filter_same_value(self.column, self.value, self.negate)
            except NotImplementedError:
                pass

        column = data.domain.index(self.column)
        if data.domain[column].is_primitive() and not isinstance(self.value, Real):
            value = data.domain[column].to_val(self.value)
        else:
            value = self.value

        if column >= 0:
            if self.negate:
                retain = np.fromiter((inst[column] != value for inst in data), bool, len(data))
            else:
                retain = np.fromiter((inst[column] == value for inst in data), bool, len(data))
        else:
            column = -1 - column
            if self.negate:
                retain = np.fromiter((inst._metas[column] != value for inst in data), bool, len(data))
            else:
                retain = np.fromiter((inst._metas[column] == value for inst in data), bool, len(data))
        return data[retain]
Exemple #9
0
def _listparser(dlist, freq=None):
    "Constructs a DateArray from a list."
    dlist = np.array(dlist, copy=False, ndmin=1)
    # Case #1: dates as strings .................
    if dlist.dtype.kind in 'SU':
        #...construct a list of dates
        dlist = np.fromiter((Date(freq, string=s).value for s in dlist),
                            dtype=int)
    # Case #2: dates as numbers .................
    elif dlist.dtype.kind in 'if':
        #...hopefully, they are values
        pass
    # Case #3: dates as objects .................
    elif dlist.dtype.kind == 'O':
        template = dlist[0]
        #...as Date objects
        if isinstance(template, Date):
            dlist = np.fromiter((d.value for d in dlist), dtype=int)
            if freq in (_c.FR_UND, None):
                freq = template.freq
        #...as mx.DateTime objects
        elif hasattr(template, 'absdays'):
            dlist = np.fromiter((Date(freq, datetime=m) for m in dlist),
                                dtype=int)
        #...as datetime objects
        elif hasattr(template, 'toordinal'):
            dlist = np.fromiter((Date(freq, datetime=d) for d in dlist),
                                dtype=int)
    #
    result = dlist.view(DateArray)
    result.freq = freq
    return result
    def mmphi(self):
        """Returns the Morris-Mitchell sampling criterion for this Latin
        hypercube.
        """

        if self.phi is None:
            distdict = {}

            # Calculate the norm between each pair of points in the DOE
            arr = self.doe
            n, m = arr.shape
            for i in range(1, n):
                nrm = np.linalg.norm(arr[i] - arr[:i], ord=self.p, axis=1)
                for j in range(0, i):
                    nrmj = nrm[j]
                    if nrmj in distdict:
                        distdict[nrmj] += 1
                    else:
                        distdict[nrmj] = 1

            size = len(distdict)

            distinct_d = np.fromiter(distdict, dtype=float, count=size)

            # Mutltiplicity array with a count of how many pairs of points
            # have a given distance
            J = np.fromiter(itervalues(distdict), dtype=int, count=size)

            self.phi = sum(J * (distinct_d ** (-self.q))) ** (1.0 / self.q)

        return self.phi
Exemple #11
0
def _bam_file_to_movie_results(file_name):
    """
    Read what is assumed to be a single BAM file (as a ConsensusReadSet).
    """
    from pbcore.io import IndexedBamReader
    results = []
    with IndexedBamReader(file_name) as bam:
        for rg in bam.readGroupTable:
            assert rg["ReadType"] == "CCS"

        movies = list(set([rg["MovieName"] for rg in bam.readGroupTable]))
        for movie_name in movies:
            def _base_calls():
                for r in bam:
                    if r.movieName == movie_name:
                        yield r.peer.query_length

            def _num_passes():
                for r in bam:
                    if r.movieName == movie_name:
                        yield r.numPasses

            def _accuracy():
                for r in bam:
                    if r.movieName == movie_name:
                        yield r.readScore

            read_lengths = np.fromiter(_base_calls(), dtype=np.int64, count=-1)
            num_passes = np.fromiter(_num_passes(), dtype=np.int64, count=-1)
            accuracy = np.fromiter(_accuracy(), dtype=np.float, count=-1)

            results.append(MovieResult(
                file_name, movie_name, read_lengths, accuracy, num_passes))
        return results
Exemple #12
0
    def __init__(self, parsed_mesh, borders=None, default_border="land",
                 ignore_given_edges=False, projection=None):
        if borders is None:
            borders = {}
        self.elements = parsed_mesh.elements
        self.nodes = meshtools.project_nodes(projection, parsed_mesh.elements,
                                             parsed_mesh.nodes,
                                             attempt_flatten=True)
        self.edge_collections = \
            meshtools.organize_edges(parsed_mesh.edges, borders=borders,
                                     default_border=default_border)

        if max(map(len, self.edge_collections.values())) == 0 \
                or ignore_given_edges:
            self.edge_collections = {default_border:
                 set(meshtools.extract_boundary_edges(self.elements))}

        if len(np.unique(self.elements)) != self.nodes.shape[0]:
            self._fix_unused_nodes()

        self.boundary_nodes = {}
        interior_nodes = set(range(1, len(self.nodes)+1))
        for name, edge_collection in self.edge_collections.items():
            self.boundary_nodes[name] = \
                np.fromiter(set(node for edge in edge_collection
                                for node in edge[0:-1]), int)
            interior_nodes -= set(self.boundary_nodes[name])

        self.interior_nodes = np.fromiter(interior_nodes, int)
        self.order = _element_order(self.elements.shape[1])
        self.mean_stepsize = self._get_stepsize()
Exemple #13
0
    def system_values(self, when: Union[Real, Sequence[Real]], which: Union[str, Sequence[str]]=None):
        which = self._observable_names if which is None else which
        max_when = when if isinstance(when, Real) else max(when)

        self.integrate_to(max_when)

        if len(self.solution_times) == 1:
            # Handle scipy bug when there is only one time point
            # TODO (drhagen): super hacky solution here
            state_interpolator = lambda t: self.solution_states[0]
        else:
            state_interpolator = interp1d(self.solution_times, self.solution_states, axis=0, assume_sorted=True,
                                          copy=False)

        # Extract values from solution
        output_fun = self.ode_system.outputs
        if isinstance(which, str) and isinstance(when, Real):
            states = state_interpolator(when)
            return output_fun(which, when, states)
        elif isinstance(which, str):
            return np.fromiter((output_fun(which, when_i, state_interpolator(when_i)) for when_i in when),
                               'float', count=len(when))
        elif isinstance(when, Real):
            states = state_interpolator(when)
            return np.fromiter((output_fun(which_i, when, states) for which_i in which),
                               'float', count=len(which))
        else:
            def values():
                for when_i in when:
                    states = state_interpolator(when_i)
                    for which_i in which:
                        yield output_fun(which_i, when_i, states)

            values = np.fromiter(values(), 'float', count=len(which)*len(when))
            return np.reshape(values, [len(when), len(which)])
Exemple #14
0
 def fit(self, X):
     adj_matrix = self.adj_matrix_strategy(X)
     if self.initial_ordering is None:
         ordering = np.arange(len(X), dtype=int)
     else:
         ordering = self.initial_ordering
         assert len(ordering) == len(X), \
             "initial_ordering has wrong length"
     order = prc.createOrder(ordering)
     labels = prc.ivec([0]*len(X))
     policy = prc.iprPolicyStruct()
     policy.iprNumberOfClustering = self.number_of_clustering
     policy.iprMaxIterations = self.max_iterations
     policy.iprConvergenceThreshold = self.convergence_threshold
     res = prc.ipr(adj_matrix, order, labels,
                   self.n_clusters, policy)
     self._ordering = np.fromiter(order.vdata, dtype=int)
     ## calculate boundaries of original matrix w/ new ordering
     prc.calcBoundaries(adj_matrix, order)
     raw_boundaries = order.b.b
     self._boundary = np.fromiter(raw_boundaries,
                                  dtype=float)[:-1] # XXX: slice needed?
     self._width = np.sort(self._boundary)[::-1]
     self.labels_ = np.fromiter(labels, dtype=int)
     self._pinch_ratios, _ = compute_pr_cluster_indices(
         self._ordering, self._boundary, self.n_clusters,
         compute_thick_part_PR)
Exemple #15
0
    def _fit_once(self, X, initial_order):
        adj_matrix = self.adj_matrix_strategy(X)
        N = adj_matrix.shape[0]
        degrees = adj_matrix.sum(axis=1)
        boundary = np.zeros(N)

        ordering = prc.createOrder(initial_order)
        policy = prc.tiloPolicyStruct()
        if self.refine_order:
            prc.RefineTILO(adj_matrix, ordering, policy)
        else:
            prc.TILO(adj_matrix, ordering, policy)
            
        boundary = np.fromiter(ordering.b.b, dtype=float)[:-1]
        ordering = np.fromiter(ordering.vdata, dtype=int)

        #print 'BDR', boundary
        #print 'PRS', pinch_ratios(boundary)
        #print 'ORD', ordering
        pinch_ratios, clusters = self._find_clusters(ordering, boundary)
        labels = np.zeros(N, dtype=int)
        for i, cluster in enumerate(clusters):
            labels[cluster] = i

        return ordering, boundary, labels, pinch_ratios
Exemple #16
0
def _match_sub(disc_clsdict, gold_clsdict, phn_corpus, names, label,
               verbose, n_jobs):
    em = eval_from_psets
    if verbose:
        print '  matching ({2}): subsampled {0} files in {1} sets'\
            .format(sum(map(len, names)), len(names), label)
    with verb_print('  matching ({0}): prepping psets'.format(label),
                             verbose, True, True, True):
        pdiscs = [make_pdisc(disc_clsdict.restrict(fs, True),
                             False, False)
                  for fs in names]
        pgolds = [make_pgold(gold_clsdict.restrict(fs, True),
                             False, False)
                  for fs in names]
        psubs = [make_psubs(disc_clsdict.restrict(fs, True),
                            phn_corpus, 3, 20, False, False)
                 for fs in names]
    with verb_print('  matching ({0}): calculating scores'
                             .format(label), verbose, False, True, False):
        tp, tr = izip(*Parallel(n_jobs=n_jobs,
                                verbose=5 if verbose else 0,
                                pre_dispatch='n_jobs')
                      (delayed(em)(pdisc, pgold, psub)
                      for pdisc, pgold, psub in zip(pdiscs, pgolds, psubs)))
    tp, tr = np.fromiter(tp, dtype=np.double), np.fromiter(tr, dtype=np.double)
    tp, tr = praggregate(tp, tr)
    return tp, tr
Exemple #17
0
def token_type(disc_clsdict, wrd_corpus, fragments_within, fragments_cross,
               dest, verbose, n_jobs):
    if verbose:
        print banner('TOKEN/TYPE')
    ptoc, rtoc, ptyc, rtyc = _token_type_sub(disc_clsdict, wrd_corpus,
                                             fragments_cross, 'cross',
                                             verbose, n_jobs)
    ftoc = np.fromiter((fscore(ptoc[i], rtoc[i]) for i in xrange(ptoc.shape[0])),
                       dtype=np.double)
    ftyc = np.fromiter((fscore(ptyc[i], rtyc[i]) for i in xrange(ptyc.shape[0])),
                       dtype=np.double)

    ptow, rtow, ptyw, rtyw = _token_type_sub(disc_clsdict, wrd_corpus,
                                             fragments_within, 'within',
                                             verbose, n_jobs)
    ftow = np.fromiter((fscore(ptow[i], rtow[i]) for i in xrange(ptow.shape[0])),
                       dtype=np.double)
    ftyw = np.fromiter((fscore(ptyw[i], rtyw[i]) for i in xrange(rtyw.shape[0])),
                       dtype=np.double)
    with open(path.join(dest, 'token_type'), 'w') as fid:
        fid.write(pretty_score_f(ptoc, rtoc, ftoc, 'token total',
                                 len(fragments_cross),
                                 sum(map(len, fragments_cross))))
        fid.write('\n')
        fid.write(pretty_score_f(ptyc, rtyc, ftyc, 'type total',
                                 len(fragments_cross),
                                 sum(map(len, fragments_cross))))
        fid.write('\n')
        fid.write(pretty_score_f(ptow, rtow, ftow, 'token within-speaker only',
                                 len(fragments_within),
                                 sum(map(len, fragments_within))))
        fid.write('\n')
        fid.write(pretty_score_f(ptyw, rtyw, ftyw, 'type within-speaker only',
                                 len(fragments_within),
                                 sum(map(len, fragments_within))))
Exemple #18
0
def where_close(pos, separation, intensity=None):
    """ Returns indices of features that are closer than separation from other
    features. When intensity is given, the one with the lowest intensity is
    returned: else the most topleft is returned (to avoid randomness)

    To be implemented in trackpy v0.4"""
    if len(pos) == 0:
        return []
    separation = validate_tuple(separation, pos.shape[1])
    if any([s == 0 for s in separation]):
        return []
    # Rescale positions, so that pairs are identified below a distance
    # of 1.
    pos_rescaled = pos / separation
    duplicates = cKDTree(pos_rescaled, 30).query_pairs(1 - 1e-7)
    if len(duplicates) == 0:
        return []
    index_0 = np.fromiter((x[0] for x in duplicates), dtype=int)
    index_1 = np.fromiter((x[1] for x in duplicates), dtype=int)
    if intensity is None:
        to_drop = np.where(np.sum(pos_rescaled[index_0], 1) >
                           np.sum(pos_rescaled[index_1], 1),
                           index_1, index_0)
    else:
        intensity_0 = intensity[index_0]
        intensity_1 = intensity[index_1]
        to_drop = np.where(intensity_0 > intensity_1, index_1, index_0)
        edge_cases = intensity_0 == intensity_1
        if np.any(edge_cases):
            index_0 = index_0[edge_cases]
            index_1 = index_1[edge_cases]
            to_drop[edge_cases] = np.where(np.sum(pos_rescaled[index_0], 1) >
                                           np.sum(pos_rescaled[index_1], 1),
                                           index_1, index_0)
    return np.unique(to_drop)
Exemple #19
0
def flatten_correspondences(fm_nestlist, fs_nestlist, daid_nestlist, query_sccw):
    """
    helper
    """
    iflat_ = utool.iflatten
    DAID_DTYPE = hstypes.INDEX_TYPE
    FS_DTYPE = hstypes.FS_DTYPE
    FM_DTYPE = hstypes.FM_DTYPE

    #_all_daids = np.array(list(utool.iflatten(daid_nestlist)), dtype=hstypes.INDEX_TYPE)
    #_all_scores = np.array(list(utool.iflatten(fs_nestlist)), dtype=hstypes.FS_DTYPE) * query_sccw
    #_all_matches = np.array(list(utool.iflatten(fm_nestlist)), dtype=hstypes.FM_DTYPE)

    #count1 = sum(map(len, daid_nestlist))
    count = sum(map(len, fs_nestlist))
    #count3 = sum(map(len, fm_nestlist))
    all_daids   = np.fromiter(iflat_(daid_nestlist), DAID_DTYPE, count)
    all_scores  = np.fromiter(iflat_(fs_nestlist), FS_DTYPE, count) * query_sccw
    # Shape hack so we can use fromiter which outputs a 1D array
    all_matches = np.fromiter(iflat_(iflat_(fm_nestlist)), FM_DTYPE, 2 * count)
    all_matches.shape = (all_matches.size / 2, 2)

    if utool.DEBUG2:
        assert len(all_daids) == len(all_scores), 'inconsistent len'
        assert len(all_matches) == len(all_scores), 'inconsistent len'
        print('[smk_core] checked build_chipmatch flatten ...ok')

    return all_matches, all_scores, all_daids
Exemple #20
0
def _join_staves(staff_dist, *sections):
  staff_dict = dict((s, np.array([s])) for s in sections[0])
  for i, cur_staves in enumerate(sections[1:]):
    last_staves = np.sort(np.fromiter(staff_dict.keys(), int))
    dist = np.abs(last_staves[None, :] - cur_staves[:, None])
    did_match = dist.min(axis=1) < staff_dist

    new_matches = dict()
    
    matching_staves = cur_staves[did_match]
    matches = np.argmin(dist[did_match, :], axis=1)
    matches, idx = np.unique(matches, return_index=True)
    matching_staves = matching_staves[idx]
    for staff_ind, new_point in zip(matches, matching_staves):
      prev_staff = staff_dict[last_staves[staff_ind]]
      new_matches[new_point] = np.concatenate([prev_staff, [new_point]])
    
    non_matches = cur_staves[~did_match]
    for non_match in non_matches:
      new_matches[non_match] = np.asarray([-non_match] * (i + 1) + [non_match])
    
    skipped = set(staff_dict.keys()).difference(s[-2] for s in new_matches.values())
    for s in skipped:
      new_matches[s] = np.concatenate([staff_dict[s], [-s]])
    staff_dict = new_matches
  return np.asarray([staff_dict[s] for s in np.sort(np.fromiter(staff_dict.keys(), int))])
Exemple #21
0
 def __getpe(self,tree):
     """ Given the symbol counts at various depths, calculate the memoryless
     probabilities (in log2-space) of the corresponding sequences 
     using the KT estimator
     
     KT-estimate is defined as:
     Pe := Prod_{foreach symbol in ALPHABET}( (symbol counts-1/2)! ) / ..
        ( ( total symbol counts - len(ALPHABET)/2 )! ) 
     
     Keyword arguments:
     tree:   (dict) : keys are occuring contexts (str), counts are 
                         symbol counts for symbols of alphabet given context
     
     Returns:
     tree_pe: (dict): keys are occuring contexts (str), values are the 
         memoryless probabilities of the sequence corresponding to this context
         / we define log_2(0) = 0
     """
     
     treepe = dict()
     for context,vals in tree.items():
         lengthsubseq = sum(vals)
         if lengthsubseq > 0:
             # KT - estimator
             denum = np.log2(np.fromiter(range(lengthsubseq), float)+len(ALPHABET)/2).sum()
             numer = 0
             for x in vals:
                 if x>0:
                     numer+=np.log2(np.fromiter(range(1,x+1), float)-1/2).sum()
             treepe[context] = numer-denum
         else:
             treepe[context] = 0            
     return treepe
Exemple #22
0
def SNrest():
    path = "../data/restframe/"
    objnames, band, mjd, mag, magerr, stype = [],[],[],[],[], []
    formatcode = ('|S16,'.rstrip('#') +'f8,'*6 + '|S16,' + 4 * 'f8,' + '|S16,' * 3 + 'f8,' * 2 + '|S16,' + 'f8,' * 2)
    filenames = os.listdir(path)
    for filename in filenames:
        data = np.recfromtxt(os.path.join(path, filename),usecols = (0,1,2,3,4), dtype = formatcode, names = True, skip_header = 13, case_sensitive = 'lower', invalid_raise = False)
        name = np.empty(len(data.band), dtype = 'S20')
        name.fill(filename)
        objnames.append(name)
        data.band = [x.lower() for x in data.band]
        band.append(data.band)
        mjd.append(data.phase)
        mag.append(data.mag)
        magerr.append(data.err)
        
        
    objnames = np.fromiter(itertools.chain.from_iterable(objnames), dtype = 'S20')
    band = np.fromiter(itertools.chain.from_iterable(band), dtype = 'S16')
    mjd = np.fromiter(itertools.chain.from_iterable(mjd), dtype = 'float')
    mag = np.fromiter(itertools.chain.from_iterable(mag), dtype = 'float')
    magerr = np.fromiter(itertools.chain.from_iterable(magerr), dtype = 'float')
    stype = np.full(len(objnames), 1)
    LC = Lightcurve(objnames, band, mjd, mag, magerr, stype)
    return LC
Exemple #23
0
  def _computeNormalizations(self):
    
    #Use a generator instead of a list to gain speed
    generator1               = (x.value for x in self.parameters.values()[1::2])
    self.alphas              = numpy.fromiter(generator1,float)
    #alphasDiff               = self.alphas[:-1]-self.alphas[1:]
    
    generator2               = (x.value for x in self.parameters.values()[2::2])
    self.betas               = numpy.fromiter(generator2,float)
    #betasDiff                = self.betas[:-1]-self.betas[1:]
    
    #bLogEpivot               = self.betas*self.logPivotEnergies
    #bLogEpivotDiff           = bLogEpivot[1:]-bLogEpivot[:-1]

        
    self.normalizations[0]   = self.parameters['K'].value
    self.normalizations[1:-1]  = (self._logP(self.energyBreaks,self.alphas[:-1],self.betas[:-1],self.pivotEnergies[:-1])/
                                  self._logP(self.energyBreaks,self.alphas[1:],self.betas[1:],self.pivotEnergies[1:])
                                  )
    self.normalizations[-1]  = 1.0
    
    #This compute the cumulative product of the array
    #(i.e., the first elements is a0, the second a0*a1,
    #the third a0*a1*a2, and so on...)
    self.products            = numpy.cumprod(self.normalizations)
Exemple #24
0
 def test_vector(self):
     v1 = Vector(self.list1)
     v2 = Vector(2*x for x in self.list1)
     self.assertEqual(2*v1, v2)
     n1 = np.fromiter(v1, int)
     n2 = np.fromiter(v2, int)
     self.assertEqual(v1.dot(v2), np.dot(n1,n2))
Exemple #25
0
    def get_charge_resolution(self):
        """
        Calculate and obtain the charge resolution graph arrays.

        Returns
        -------
        true_charge : ndarray
            The X axis true charges.
        chargeres : ndarray
            The Y axis charge resolution values.
        chargeres_error : ndarray
            The error on the charge resolution.
        scaled_chargeres : ndarray
            The Y axis charge resolution divided by the Goal.
        scaled_chargeres_error : ndarray
            The error on the charge resolution divided by the Goal.
        """
        log.debug('[chargeres] Calculating charge resolution')
        true_charge = np.fromiter(iter(self.sum_dict.keys()), dtype=int)
        summed_charge = np.fromiter(iter(self.sum_dict.values()), dtype=float)
        num = np.fromiter(iter(self.n_dict.values()), dtype=int)

        chargeres = np.sqrt((summed_charge / num) + true_charge) / true_charge
        chargeres_error = chargeres * (1 / np.sqrt(2 * num))

        scale = self.goal(true_charge)
        scaled_chargeres = chargeres/scale
        scaled_chargeres_error = chargeres_error/scale

        return true_charge, chargeres, chargeres_error, \
            scaled_chargeres, scaled_chargeres_error
Exemple #26
0
def extract_surf(jpgfile):
  start = time.time()
  out = os.path.join(os.path.dirname(jpgfile), os.path.basename(jpgfile)[:-4] + 'surf.npy')
  if os.path.exists(out):
    INFO('%s already exists' % out)
    return

  im = cv.LoadImageM(jpgfile, cv.CV_LOAD_IMAGE_GRAYSCALE)
  INFO('cv loaded %dx%d image' % (im.rows, im.cols))

  g, features = cv.ExtractSURF(im, None, cv.CreateMemStorage(), (0, 500, 3, 4))
  data = np.ndarray(len(features), SURFReader.surf_dtype)

  for i in range(len(features)):
    data[i]['vec'] = np.fromiter(features[i], np.float32)
    data[i]['geom'] = np.fromiter([g[i][0][0], g[i][0][1], g[i][2]], np.uint16)
    data[i]['index'] = 0

## Simple Quantization into bytes
#  for i in range(len(features)):
#    surfvalues = np.fromiter(features[i], np.float)
#
#    assert max(surfvalues) <= 1.0
#    assert min(surfvalues) >= -1.0
#
#    data[i]['vec'] = np.int8(127*surfvalues)
#    data[i]['geom'] = np.fromiter([g[i][0][0], g[i][0][1], g[i][2]], np.uint16)
#    data[i]['index'] = 0

  save_atomic(lambda d: np.save(d, data), out)
  INFO('cv wrote %d features' % len(features))
  INFO_TIMING('took %f seconds' % (time.time() - start))
Exemple #27
0
    def __init__(self, image):
        # number of points
        self.nx = int(image.shape[0])
        self.ny = int(image.shape[1])

        # spacing
        self.dx = 1.0
        self.dy = 1.0

        # limits
        self.xmin = 0
        self.ymin = 0

        self.xmax = float(self.nx)
        self.ymax = float(self.ny)

        # lengths
        self.lx = abs(self.xmax - self.xmin)
        self.ly = abs(self.ymax - self.ymin)

        # mesh
        self.y, self.x = np.meshgrid(
            np.fromiter(((0.5 + i) * self.dx for i in range(self.nx)),
                dtype=np.float64, count=self.nx),
            np.fromiter(((0.5 + i) * self.dy for i in range(self.ny)),
                dtype=np.float64, count=self.ny))
Exemple #28
0
    def get_world_endpoints(edges, pos, scale):
        """Returns the edge endpoints in homogeneous world coordinates

        Parameters
        ----------
        edges : iterable of Edge
        pos : numpy array
        scale : float

        Returns
        -------
        tuple of iterable of points
            a value in the form `(start_points, end_points)`, where
            `start_points` and `end_points` are in the form of a numpy matrix
        """
        edge_starts = (coord
                       for edge in edges
                       for coord in chain(scale * edge.start + pos, (1.0, )))
        edge_ends = (coord
                     for edge in edges
                     for coord in chain(scale * edge.end + pos, (1.0, )))

        homo_starts = np.fromiter(edge_starts, np.float, count=4 * len(edges))
        homo_ends = np.fromiter(edge_ends, np.float, count=4 * len(edges))

        homo_starts = homo_starts.reshape((len(edges), 4))
        homo_ends = homo_ends.reshape((len(edges), 4))

        return homo_starts, homo_ends
Exemple #29
0
def decodePacket(bin_data,  data_size = None, packet_mode = 'i64u', track_t0 = False):
    # Works only for i64bit unpacked mode
    global compressed_t0
    #assert (packet_mode == 'i64u')
    if packet_mode == 'i64u':
        data_size = len(bin_data)//ctypes.sizeof(Timetag_I64)

        t = ctypes.cast(bin_data, timetag_I64_p)

        time = np.fromiter((i.time for i in t), np.int64, data_size)
        channel = np.fromiter((i.channel for i in t), np.int8, data_size)

    if packet_mode == 'i64c':
        data_size = len(bin_data)//ctypes.sizeof(Timetag_I64c)

        t = ctypes.cast(bin_data, timetag_I64c_p)
        #if t[0].highlow == 0:
        #    ctypes.cast(bin_data, timetag_I64c_p)  
        highlow = np.fromiter((i.highlow for i in t ), np.uint64, data_size)
        time =   np.fromiter((i.timehigh for i in t ), np.uint64, data_size)+(cumsum(highlow))*2**27
        channel = np.fromiter((i.channel for i in t ), np.uint8, data_size)
        time    = time[highlow == 0]
        channel = channel[highlow == 0]

        if track_t0:
            time = time + compressed_t0
            compressed_t0 += sum(highlow)*2**27
        else:
            track_t0 = 0
    return(time, channel)
Exemple #30
0
def CalculateMASE(train_guess, train_correct, test_guess, test_correct):
    """Calculates the Mean Absolute Scaled Error"""

    def CacluateNaive(train_correct):

        error = 0
        c = 0
        for t1, t2 in zip(train_correct[1:], train_correct):
            res = abs(t1-t2)
            if not numpy.isnan(res):
                error += res
                c += 1
        return error/c


    try:
        abs_error_train = abs(train_guess - train_correct)
        abs_error_test  = abs(test_guess - test_correct)
    except TypeError:
        #if they're the wrong type then convert them accordingly
        train_guess = numpy.fromiter(train_guess, numpy.float)
        train_correct = numpy.fromiter(train_correct, numpy.float)
        test_guess = numpy.fromiter(test_guess, numpy.float)
        test_correct = numpy.fromiter(test_correct, numpy.float)
        abs_error_train = abs(train_guess - train_correct)
        abs_error_test  = abs(test_guess - test_correct)

    naive_scale = CacluateNaive(train_correct)

    train_scaled_errors = abs_error_train/naive_scale
    test_scaled_errors = abs_error_test/naive_scale

    train_mase = nanmean(train_scaled_errors)
    test_mase = nanmean(test_scaled_errors)
    return train_mase, test_mase
Exemple #31
0
    def recognize_batch(self, xs_list, recog_args, char_list, rnnlm=None):
        """E2E beam search.

        :param list xs_list: list of list of input acoustic feature arrays
                [[(T1_1, D), (T1_2, D), ...],[(T2_1, D), (T2_2, D), ...], ...]
        :param Namespace recog_args: argument Namespace containing options
        :param list char_list: list of characters
        :param torch.nn.Module rnnlm: language model module
        :return: N-best decoding results
        :rtype: list
        """
        prev = self.training
        self.eval()
        ilens_list = [
            np.fromiter((xx.shape[0] for xx in xs_list[idx]), dtype=np.int64)
            for idx in range(self.num_encs)
        ]

        # subsample frame
        xs_list = [[
            xx[::self.subsample_list[idx][0], :] for xx in xs_list[idx]
        ] for idx in range(self.num_encs)]

        xs_list = [[
            to_device(self,
                      to_torch_tensor(xx).float()) for xx in xs_list[idx]
        ] for idx in range(self.num_encs)]
        xs_pad_list = [
            pad_list(xs_list[idx], 0.0) for idx in range(self.num_encs)
        ]

        # 1. Encoder
        hs_pad_list, hlens_list = [], []
        for idx in range(self.num_encs):
            hs_pad, hlens, _ = self.enc[idx](xs_pad_list[idx], ilens_list[idx])
            hs_pad_list.append(hs_pad)
            hlens_list.append(hlens)

        # calculate log P(z_t|X) for CTC scores
        if recog_args.ctc_weight > 0.0:
            if self.share_ctc:
                lpz_list = [
                    self.ctc[0].log_softmax(hs_pad_list[idx])
                    for idx in range(self.num_encs)
                ]
            else:
                lpz_list = [
                    self.ctc[idx].log_softmax(hs_pad_list[idx])
                    for idx in range(self.num_encs)
                ]
            normalize_score = False
        else:
            lpz_list = None
            normalize_score = True

        # 2. Decoder
        hlens_list = [
            torch.tensor(list(map(int, hlens_list[idx])))
            for idx in range(self.num_encs)
        ]  # make sure hlens is tensor
        y = self.dec.recognize_beam_batch(
            hs_pad_list,
            hlens_list,
            lpz_list,
            recog_args,
            char_list,
            rnnlm,
            normalize_score=normalize_score,
        )

        if prev:
            self.train()
        return y
Exemple #32
0
def _gen_episodes(router_type: str,
                  one_out: bool,
                  factory: RouterFactory,
                  num_episodes: int,
                  bar=None,
                  sinks=None,
                  random_seed=None) -> pd.DataFrame:
    G = factory.topology_graph
    nodes = sorted(G.nodes)
    n = len(nodes)

    amatrix = nx.convert_matrix.to_numpy_array(G,
                                               nodelist=nodes,
                                               weight=factory.edge_weight,
                                               dtype=np.float32)
    gstate = np.ravel(amatrix)

    best_transitions = defaultdict(dict)
    lengths = defaultdict(dict)

    for start_node in nodes:
        for finish_node in nodes:
            if start_node != finish_node and nx.has_path(
                    G, start_node, finish_node):
                path = nx.dijkstra_path(G,
                                        start_node,
                                        finish_node,
                                        weight=factory.edge_weight)
                length = nx.dijkstra_path_length(G,
                                                 start_node,
                                                 finish_node,
                                                 weight=factory.edge_weight)

                best_transitions[start_node][finish_node] = path[1] if len(
                    path) > 1 else start_node
                lengths[start_node][finish_node] = length

    if sinks is None:
        sinks = nodes

    additional_inputs = None
    routers = {}
    node_dim = 1 if one_out else n

    for rid in nodes:
        router = factory._makeHandler(rid)
        update_network(router, G)
        routers[rid] = router
        if additional_inputs is None:
            additional_inputs = router.additional_inputs

    cols = ['addr', 'dst']

    if 'ppo' in router_type:
        for inp in additional_inputs:
            cols += add_input_cols(inp['tag'], inp.get('dim', n))
        cols += ['next_addr', 'addr_v_func']
    else:
        if node_dim == 1:
            cols.append('neighbour')
        else:
            cols += get_neighbors_cols(node_dim)

        for inp in additional_inputs:
            cols += add_input_cols(inp['tag'], inp.get('dim', n))

        if node_dim == 1:
            cols.append('predict')
        else:
            cols += get_target_cols(n)

    df = pd.DataFrame(columns=cols)

    if random_seed is not None:
        set_random_seed(random_seed)

    pkg_id = 1
    episode = 0
    while episode < num_episodes:
        dst = random.choice(sinks)
        cur = random.choice(only_reachable(G, dst, nodes))
        router = routers[cur]
        out_nbrs = G.successors(router.id)
        nbrs = only_reachable(G, dst, out_nbrs)

        if len(nbrs) == 0:
            continue

        episode += 1

        # ppo addition
        if 'ppo' in router_type:
            next_addr = best_transitions[cur][dst]
            full_path_length = -lengths[cur][dst]

            row = [cur[1], dst[1]
                   ] + gstate.tolist() + [next_addr[1], full_path_length]
            df.loc[len(df)] = row
        else:
            pkg = Package(pkg_id, DEF_PKG_SIZE, dst, 0, None)
            state = list(router._getNNState(pkg, nbrs))

            def plen_func(v):
                plen = nx.dijkstra_path_length(G,
                                               v,
                                               dst,
                                               weight=factory.edge_weight)
                elen = G.get_edge_data(cur, v)[factory.edge_weight]
                return -(plen + elen)

            if one_out:
                predict = np.fromiter(map(plen_func, nbrs), dtype=np.float32)
                state.append(predict)
                cat_state = np.concatenate([unsqueeze(y) for y in state],
                                           axis=1)
                for row in cat_state:
                    df.loc[len(df)] = row
            else:
                predict = np.fromiter(map(
                    lambda i: plen_func(('router', i))
                    if ('router', i) in nbrs else -INFTY, range(n)),
                                      dtype=np.float32)
                state.append(predict)
                state_ = [unsqueeze(y, 1) for y in state]
                # pprint.pprint(state_)
                cat_state = np.concatenate(state_)
                df.loc[len(df)] = cat_state

        if bar is not None:
            bar.update(1)

    return df
Exemple #33
0
def f(data):
    s = "".join(f"{int(knot_hash(f'{data}-{i}'), 16):0128b}"
                for i in range(128))
    return s.count("1"), label(np.fromiter(s, dtype=int).reshape(128, 128))[1]
Exemple #34
0
def fromiter(iter, dtype, count=None):
    return copy(numpy.fromiter(iter, dtype, count))
 def _section_mean_radii(tree_radii, section_begs, section_ends):
     """Returns the mean radius per section"""
     return np.fromiter(
         (np.mean(tree_radii[b: e]) for b, e in zip(section_begs, section_ends)),
         dtype=np.float
     )
Exemple #36
0
def mantel(x, y, method='pearson', permutations=999, alternative='two-sided',
           strict=True, lookup=None):
    """Compute correlation between distance matrices using the Mantel test.

    The Mantel test compares two distance matrices by computing the correlation
    between the distances in the lower (or upper) triangular portions of the
    symmetric distance matrices. Correlation can be computed using Pearson's
    product-moment correlation coefficient or Spearman's rank correlation
    coefficient.

    As defined in [1]_, the Mantel test computes a test statistic :math:`r_M`
    given two symmetric distance matrices :math:`D_X` and :math:`D_Y`.
    :math:`r_M` is defined as

    .. math::

       r_M=\\frac{1}{d-1}\\sum_{i=1}^{n-1}\\sum_{j=i+1}^{n}
       stand(D_X)_{ij}stand(D_Y)_{ij}

    where

    .. math::

       d=\\frac{n(n-1)}{2}

    and :math:`n` is the number of rows/columns in each of the distance
    matrices. :math:`stand(D_X)` and :math:`stand(D_Y)` are distance matrices
    with their upper triangles containing standardized distances. Note that
    since :math:`D_X` and :math:`D_Y` are symmetric, the lower triangular
    portions of the matrices could equivalently have been used instead of the
    upper triangular portions (the current function behaves in this manner).

    If ``method='spearman'``, the above equation operates on ranked distances
    instead of the original distances.

    Statistical significance is assessed via a permutation test. The rows and
    columns of the first distance matrix (`x`) are randomly permuted a
    number of times (controlled via `permutations`). A correlation coefficient
    is computed for each permutation and the p-value is the proportion of
    permuted correlation coefficients that are equal to or more extreme
    than the original (unpermuted) correlation coefficient. Whether a permuted
    correlation coefficient is "more extreme" than the original correlation
    coefficient depends on the alternative hypothesis (controlled via
    `alternative`).

    Parameters
    ----------
    x, y : DistanceMatrix or array_like
        Input distance matrices to compare. If `x` and `y` are both
        ``DistanceMatrix`` instances, they will be reordered based on matching
        IDs (see `strict` and `lookup` below for handling matching/mismatching
        IDs); thus they are not required to be in the same ID order. If `x` and
        `y` are ``array_like``, no reordering is applied and both matrices must
        have the same shape. In either case, `x` and `y` must be at least 3x3
        in size *after* reordering and matching of IDs.
    method : {'pearson', 'spearman'}
        Method used to compute the correlation between distance matrices.
    permutations : int, optional
        Number of times to randomly permute `x` when assessing statistical
        significance. Must be greater than or equal to zero. If zero,
        statistical significance calculations will be skipped and the p-value
        will be ``np.nan``.
    alternative : {'two-sided', 'greater', 'less'}
        Alternative hypothesis to use when calculating statistical
        significance. The default ``'two-sided'`` alternative hypothesis
        calculates the proportion of permuted correlation coefficients whose
        magnitude (i.e. after taking the absolute value) is greater than or
        equal to the absolute value of the original correlation coefficient.
        ``'greater'`` calculates the proportion of permuted coefficients that
        are greater than or equal to the original coefficient. ``'less'``
        calculates the proportion of permuted coefficients that are less than
        or equal to the original coefficient.
    strict : bool, optional
        If ``True``, raises a ``ValueError`` if IDs are found that do not exist
        in both distance matrices. If ``False``, any nonmatching IDs are
        discarded before running the test. See `n` (in Returns section below)
        for the number of matching IDs that were used in the test. This
        parameter is ignored if `x` and `y` are ``array_like``.
    lookup : dict, optional
        Maps each ID in the distance matrices to a new ID. Used to match up IDs
        across distance matrices prior to running the Mantel test. If the IDs
        already match between the distance matrices, this parameter is not
        necessary. This parameter is disallowed if `x` and `y` are
        ``array_like``.

    Returns
    -------
    corr_coeff : float
        Correlation coefficient of the test (depends on `method`).
    p_value : float
        p-value of the test.
    n : int
        Number of rows/columns in each of the distance matrices, after any
        reordering/matching of IDs. If ``strict=False``, nonmatching IDs may
        have been discarded from one or both of the distance matrices prior to
        running the Mantel test, so this value may be important as it indicates
        the *actual* size of the matrices that were compared.

    Raises
    ------
    ValueError
        If `x` and `y` are not at least 3x3 in size after reordering/matching
        of IDs, or an invalid `method`, number of `permutations`, or
        `alternative` are provided.
    TypeError
        If `x` and `y` are not both ``DistanceMatrix`` instances or
        ``array_like``.

    See Also
    --------
    DistanceMatrix
    scipy.stats.pearsonr
    scipy.stats.spearmanr
    pwmantel

    Notes
    -----
    The Mantel test was first described in [2]_. The general algorithm and
    interface are similar to ``vegan::mantel``, available in R's vegan
    package [3]_.

    ``np.nan`` will be returned for the p-value if `permutations` is zero or if
    the correlation coefficient is ``np.nan``. The correlation coefficient will
    be ``np.nan`` if one or both of the inputs does not have any variation
    (i.e. the distances are all constant) and ``method='spearman'``.

    References
    ----------
    .. [1] Legendre, P. and Legendre, L. (2012) Numerical Ecology. 3rd English
       Edition. Elsevier.

    .. [2] Mantel, N. (1967). "The detection of disease clustering and a
       generalized regression approach". Cancer Research 27 (2): 209-220. PMID
       6018555.

    .. [3] http://cran.r-project.org/web/packages/vegan/index.html

    Examples
    --------
    Import the functionality we'll use in the following examples:

    >>> from skbio import DistanceMatrix
    >>> from skbio.stats.distance import mantel

    Define two 3x3 distance matrices:

    >>> x = DistanceMatrix([[0, 1, 2],
    ...                     [1, 0, 3],
    ...                     [2, 3, 0]])
    >>> y = DistanceMatrix([[0, 2, 7],
    ...                     [2, 0, 6],
    ...                     [7, 6, 0]])

    Compute the Pearson correlation between them and assess significance using
    a two-sided test with 999 permutations:

    >>> coeff, p_value, n = mantel(x, y)
    >>> round(coeff, 4)
    0.7559

    Thus, we see a moderate-to-strong positive correlation (:math:`r_M=0.7559`)
    between the two matrices.

    In the previous example, the distance matrices (``x`` and ``y``) have the
    same IDs, in the same order:

    >>> x.ids
    ('0', '1', '2')
    >>> y.ids
    ('0', '1', '2')

    If necessary, ``mantel`` will reorder the distance matrices prior to
    running the test. The function also supports a ``lookup`` dictionary that
    maps distance matrix IDs to new IDs, providing a way to match IDs between
    distance matrices prior to running the Mantel test.

    For example, let's reassign the distance matrices' IDs so that there are no
    matching IDs between them:

    >>> x.ids = ('a', 'b', 'c')
    >>> y.ids = ('d', 'e', 'f')

    If we rerun ``mantel``, we get the following error notifying us that there
    are nonmatching IDs (this is the default behavior with ``strict=True``):

    >>> mantel(x, y)
    Traceback (most recent call last):
        ...
    ValueError: IDs exist that are not in both distance matrices.

    If we pass ``strict=False`` to ignore/discard nonmatching IDs, we see that
    no matches exist between `x` and `y`, so the Mantel test still cannot be
    run:

    >>> mantel(x, y, strict=False)
    Traceback (most recent call last):
        ...
    ValueError: No matching IDs exist between the distance matrices.

    To work around this, we can define a ``lookup`` dictionary to specify how
    the IDs should be matched between distance matrices:

    >>> lookup = {'a': 'A', 'b': 'B', 'c': 'C',
    ...           'd': 'A', 'e': 'B', 'f': 'C'}

    ``lookup`` maps each ID to ``'A'``, ``'B'``, or ``'C'``. If we rerun
    ``mantel`` with ``lookup``, we get the same results as the original
    example where all distance matrix IDs matched:

    >>> coeff, p_value, n = mantel(x, y, lookup=lookup)
    >>> round(coeff, 4)
    0.7559

    ``mantel`` also accepts input that is ``array_like``. For example, if we
    redefine `x` and `y` as nested Python lists instead of ``DistanceMatrix``
    instances, we obtain the same result:

    >>> x = [[0, 1, 2],
    ...      [1, 0, 3],
    ...      [2, 3, 0]]
    >>> y = [[0, 2, 7],
    ...      [2, 0, 6],
    ...      [7, 6, 0]]
    >>> coeff, p_value, n = mantel(x, y)
    >>> round(coeff, 4)
    0.7559

    It is import to note that reordering/matching of IDs (and hence the
    ``strict`` and ``lookup`` parameters) do not apply when input is
    ``array_like`` because there is no notion of IDs.

    """
    if method == 'pearson':
        corr_func = pearsonr
    elif method == 'spearman':
        corr_func = spearmanr
    else:
        raise ValueError("Invalid correlation method '%s'." % method)

    if permutations < 0:
        raise ValueError("Number of permutations must be greater than or "
                         "equal to zero.")
    if alternative not in ('two-sided', 'greater', 'less'):
        raise ValueError("Invalid alternative hypothesis '%s'." % alternative)

    x, y = _order_dms(x, y, strict=strict, lookup=lookup)

    n = x.shape[0]
    if n < 3:
        raise ValueError("Distance matrices must have at least 3 matching IDs "
                         "between them (i.e., minimum 3x3 in size).")

    x_flat = x.condensed_form()
    y_flat = y.condensed_form()

    orig_stat = corr_func(x_flat, y_flat)[0]

    if permutations == 0 or np.isnan(orig_stat):
        p_value = np.nan
    else:
        perm_gen = (corr_func(x.permute(condensed=True), y_flat)[0]
                    for _ in range(permutations))
        permuted_stats = np.fromiter(perm_gen, np.float, count=permutations)

        if alternative == 'two-sided':
            count_better = (np.absolute(permuted_stats) >=
                            np.absolute(orig_stat)).sum()
        elif alternative == 'greater':
            count_better = (permuted_stats >= orig_stat).sum()
        else:
            count_better = (permuted_stats <= orig_stat).sum()

        p_value = (count_better + 1) / (permutations + 1)

    return orig_stat, p_value, n
Exemple #37
0
 def l2_norm(ratings):
     return np.linalg.norm(
         np.fromiter(ratings.values(), dtype=np.float64))
Exemple #38
0
def consensusAndVariantsForWindow(alnFile, refWindow, referenceContig,
                                  depthLimit, arrowConfig):
    """
    High-level routine for calling the consensus for a
    window of the genome given a cmp.h5.

    Identifies the coverage contours of the window in order to
    identify subintervals where a good consensus can be called.
    Creates the desired "no evidence consensus" where there is
    inadequate coverage.
    """
    winId, winStart, winEnd = refWindow
    logging.info("Arrow operating on %s" % reference.windowToString(refWindow))

    if options.fancyChunking:
        # 1) identify the intervals with adequate coverage for arrow
        #    consensus; restrict to intervals of length > 10
        alnHits = U.readsInWindow(alnFile,
                                  refWindow,
                                  depthLimit=20000,
                                  minMapQV=arrowConfig.minMapQV,
                                  strategy="longest",
                                  stratum=options.readStratum,
                                  barcode=options.barcode)
        starts = np.fromiter((hit.tStart for hit in alnHits), np.int)
        ends = np.fromiter((hit.tEnd for hit in alnHits), np.int)
        intervals = kSpannedIntervals(refWindow,
                                      arrowConfig.minPoaCoverage,
                                      starts,
                                      ends,
                                      minLength=10)
        coverageGaps = holes(refWindow, intervals)
        allIntervals = sorted(intervals + coverageGaps)
        if len(allIntervals) > 1:
            logging.info("Usable coverage in %s: %r" %
                         (reference.windowToString(refWindow), intervals))

    else:
        allIntervals = [(winStart, winEnd)]

    # 2) pull out the reads we will use for each interval
    # 3) call consensusForAlignments on the interval
    subConsensi = []
    variants = []

    for interval in allIntervals:
        intStart, intEnd = interval
        intRefSeq = referenceContig[intStart:intEnd]
        subWin = subWindow(refWindow, interval)

        windowRefSeq = referenceContig[intStart:intEnd]
        alns = U.readsInWindow(alnFile,
                               subWin,
                               depthLimit=depthLimit,
                               minMapQV=arrowConfig.minMapQV,
                               strategy="longest",
                               stratum=options.readStratum,
                               barcode=options.barcode)
        clippedAlns_ = [aln.clippedTo(*interval) for aln in alns]
        clippedAlns = U.filterAlns(subWin, clippedAlns_, arrowConfig)

        if len([a for a in clippedAlns if a.spansReferenceRange(*interval)
                ]) >= arrowConfig.minPoaCoverage:

            logging.debug("%s: Reads being used: %s" %
                          (reference.windowToString(subWin), " ".join(
                              [str(hit.readName) for hit in alns])))

            css = U.consensusForAlignments(subWin, intRefSeq, clippedAlns,
                                           arrowConfig)

            siteCoverage = U.coverageInWindow(subWin, alns)

            variants_ = U.variantsFromConsensus(subWin,
                                                windowRefSeq,
                                                css.sequence,
                                                css.confidence,
                                                siteCoverage,
                                                options.aligner,
                                                ai=None)

            filteredVars = filterVariants(options.minCoverage,
                                          options.minConfidence, variants_)
            # Annotate?
            if options.annotateGFF:
                annotateVariants(filteredVars, clippedAlns)

            variants += filteredVars

            # Dump?
            shouldDumpEvidence = \
                ((options.dumpEvidence == "all") or
                 (options.dumpEvidence == "variants") and (len(variants) > 0))
            if shouldDumpEvidence:
                logging.info("Arrow does not yet support --dumpEvidence")


#                 dumpEvidence(options.evidenceDirectory,
#                              subWin, windowRefSeq,
#                              clippedAlns, css)
        else:
            css = ArrowConsensus.noCallConsensus(
                arrowConfig.noEvidenceConsensus, subWin, intRefSeq)
        subConsensi.append(css)

    # 4) glue the subwindow consensus objects together to form the
    #    full window consensus
    css = join(subConsensi)

    # 5) Return
    return css, variants
Exemple #39
0
def str_array(iterable):
    return np.fromiter(iterable, 'U1')
Exemple #40
0
 def comb(n):
     return np.fromiter(
         itertools.chain.from_iterable(itertools.combinations(n, 2)),
         n.dtype).reshape((2, -1), order="F")

# Points as numpy arrays
amount = 50
min_value = 0
max_value = 250

point: Tuple[float, float] = (random.uniform(min_value, max_value),
                              random.uniform(min_value, max_value))
units: List[Tuple[float, float]] = [(random.uniform(min_value, max_value),
                                     random.uniform(min_value, max_value))
                                    for _ in range(amount)]

# Pre convert points to numpy array
flat_units = [item for sublist in units for item in sublist]
units_np = np.fromiter(flat_units, dtype=float, count=2 * len(units)).reshape(
    (-1, 2))
point_np = np.fromiter(point, dtype=float, count=2).reshape((-1, 2))

r1 = distance_matrix_scipy_cdist_squared(units, point).flatten()
r2 = distance_numpy_basic_1(units, point)
r3 = distance_numpy_basic_2(units, point)
r4 = distance_numpy_einsum(units, point)
# r5 = distance_numpy_basic_1_numba(units_np, point_np)
# r6 = distance_numpy_basic_2_numba(units_np, point_np)
# r10 = distance_numba(flat_units, point_np, len(flat_units) // 2)

assert np.array_equal(r1, r2)
assert np.array_equal(r1, r3)
assert np.array_equal(r1, r4)
# assert np.array_equal(r1, r5)
# assert np.array_equal(r1, r6)
Exemple #42
0
def num_array(iterable):
    return np.fromiter(iterable, int)
Exemple #43
0
    def _unpack_contents(self, raw_string, version):

        data = {}
        round6 = lambda x: round(x, ndigits=6)
        header_values = struct.unpack(self.header_fmt(version), raw_string[:self.header_size(version)])

        for indx, field in enumerate(self.header_fields(version)):
            data[field] = header_values[indx]

            #  handle Python 3 strings
            if (sys.version_info.major > 2) and isinstance(data[field], bytes):
                data[field] = data[field].decode('latin_1')

        data['timestamp'] = nt_to_unix((data['low_date'], data['high_date']))

        if version == 0:

            data['transceivers'] = {}

            for field in ['transect_name', 'version', 'survey_name', 'sounder_name']:
                data[field] = data[field].strip('\x00')

            sounder_name = data['sounder_name']
            if sounder_name == 'MBES':
                _me70_extra_values = struct.unpack('=hLff', data['spare0'][:14])
                data['multiplexing'] = _me70_extra_values[0]
                data['time_bias'] = _me70_extra_values[1]
                data['sound_velocity_avg'] = _me70_extra_values[2]
                data['sound_velocity_transducer'] = _me70_extra_values[3]
                data['spare0'] = data['spare0'][:14] + data['spare0'][14:].strip('\x00')

            else:
                data['spare0'] = data['spare0'].strip('\x00')

            buf_indx = self.header_size(version)

            try:
                transducer_header = self._transducer_headers[sounder_name]
                _sounder_name_used = sounder_name
            except KeyError:
                log.warning('Unknown sounder_name:  %s, (no one of %s)', sounder_name,
                    list(self._transducer_headers.keys()))
                log.warning('Will use ER60 transducer config fields as default')

                transducer_header = self._transducer_headers['ER60']
                _sounder_name_used = 'ER60'

            txcvr_header_fields = [x[0] for x in transducer_header]
            txcvr_header_fmt    = '=' + ''.join([x[1] for x in transducer_header])
            txcvr_header_size   = struct.calcsize(txcvr_header_fmt)

            for txcvr_indx in range(1, data['transceiver_count'] + 1):
                txcvr_header_values_encoded = struct.unpack(txcvr_header_fmt, raw_string[buf_indx:buf_indx + txcvr_header_size])
                txcvr_header_values = list(txcvr_header_values_encoded)
                for tx_idx, tx_val in enumerate(txcvr_header_values_encoded):
                    if isinstance(tx_val, bytes):
                        txcvr_header_values[tx_idx] = tx_val.decode()


                txcvr = data['transceivers'].setdefault(txcvr_indx, {})

                if _sounder_name_used in ['ER60', 'ES60']:
                    for txcvr_field_indx, field in enumerate(txcvr_header_fields[:17]):
                        txcvr[field] = txcvr_header_values[txcvr_field_indx]

                    txcvr['pulse_length_table']   = np.fromiter(list(map(round6, txcvr_header_values[17:22])), 'float')
                    txcvr['spare1']               = txcvr_header_values[22]
                    txcvr['gain_table']           = np.fromiter(list(map(round6, txcvr_header_values[23:28])), 'float')
                    txcvr['spare2']               = txcvr_header_values[28]
                    txcvr['sa_correction_table']  = np.fromiter(list(map(round6, txcvr_header_values[29:34])), 'float')
                    txcvr['spare3']               = txcvr_header_values[34]
                    txcvr['gpt_software_version'] = txcvr_header_values[35]
                    txcvr['spare4']               = txcvr_header_values[36]

                elif _sounder_name_used  == 'MBES':
                    for txcvr_field_indx, field in enumerate(txcvr_header_fields):
                        txcvr[field] = txcvr_header_values[txcvr_field_indx]

                else:
                    raise RuntimeError('Unknown _sounder_name_used (Should not happen, this is a bug!)')

                txcvr['channel_id']           = txcvr['channel_id'].strip('\x00')
                txcvr['spare1']               = txcvr['spare1'].strip('\x00')
                txcvr['spare2']               = txcvr['spare2'].strip('\x00')
                txcvr['spare3']               = txcvr['spare3'].strip('\x00')
                txcvr['spare4']               = txcvr['spare4'].strip('\x00')
                txcvr['gpt_software_version'] = txcvr['gpt_software_version'].strip('\x00')

                buf_indx += txcvr_header_size

        elif version == 1:
            #CON1 only has a single data field:  beam_config, holding an xml string
            data['beam_config'] = raw_string[self.header_size(version):].strip('\x00')

        return data
    def create_networkx_graph(self, graph_name, graph_attributes):

        if self.is_valid_graph_attributes(graph_attributes):
            edge_names = []
            redge_names = []
            for k, v in graph_attributes['edgeCollections'].items():
                edge_names.append(k)
                ens = k.split('_', 1)
                redge = ens[1] + '_' + ens[0]
                redge_names.append(redge)

            sgdata = {ename: nx.DiGraph() for ename in edge_names}
            rsgdata = {ename: nx.DiGraph() for ename in redge_names}
            nxg = nx.DiGraph()
            labels = []
            node_data = {}

            print("Loading edge data...")

            for k, v in graph_attributes['edgeCollections'].items():
                query = "FOR doc in %s " % (k)
                cspl = [s + ':' + 'doc.' + s for s in v]
                cspl.append('_id: doc._id')
                csps = ','.join(cspl)
                query = query + "RETURN { " + csps + "}"
                sgraph = sgdata[k]
                ens = k.split('_', 1)
                redge = ens[1] + '_' + ens[0]
                rsgraph = rsgdata[redge]
                cursor = self.db.aql.execute(query)
                for doc in cursor:
                    nfrom = doc['_from']
                    nto = doc['_to']
                    sgraph.add_edge(nfrom, nto)
                    sgraph.nodes[nfrom]['bipartite'] = 0
                    sgraph.nodes[nto]['bipartite'] = 1
                    rsgraph.add_edge(nto, nfrom)
                    rsgraph.nodes[nfrom]['bipartite'] = 1
                    rsgraph.nodes[nto]['bipartite'] = 0

            print("Loading vertex data...")
            vnames = []
            for k, v in graph_attributes['vertexCollections'].items():
                vnames.append(k)
                node_data[k] = list()
                query = "FOR doc in %s " % (k)
                cspl = [s + ':' + 'doc.' + s for s in v]
                cspl.append('_id: doc._id')
                csps = ','.join(cspl)
                query = query + "RETURN { " + csps + "}"

                cursor = self.db.aql.execute(query)
                for doc in cursor:
                    exclude_attr = ['_id', '_key', 'node_id']
                    if k == 'incident':
                        exclude_attr.append('reassigned')
                        labels.append(doc['reassigned'])
                    sdata = {
                        k: v
                        for k, v in doc.items() if k not in exclude_attr
                    }
                    ndvalues = np.fromiter(sdata.values(), dtype=int)
                    #rndata = np.asarray(ndvalues, dtype = int)
                    #v_data = th.from_numpy(rndata)
                    node_data[k].append(ndvalues)

            print("Creating DGL Heterograph...")
            dict_desc = dict()
            for ename in edge_names:
                ens = ename.split('_', 1)
                redge = ens[1] + '_' + ens[0]
                fgk = (ens[0], ename, ens[1])
                dict_desc[fgk] = nxg
                rgk = (ens[1], redge, ens[0])
                dict_desc[fgk] = sgdata[ename]
                dict_desc[rgk] = rsgdata[redge]

            g = dgl.heterograph(dict_desc)

            for v in vnames:
                rndata = np.asarray(node_data[v], dtype=int)
                v_data = th.from_numpy(rndata)
                g.nodes[v].data['f'] = v_data

        return g, labels
Exemple #45
0
    def generate_plots(self, store_figs=True):
        """Generate all the plots and save them."""
        # correct layout of samples_per_layer:
        # [numLayers, numNets, numIntervals, numRepetitions=1, numAlgorithms]
        # new layout of samplesPlot:
        # [numNets, numLayers, numIntervals, numAlgorithms]
        samples_plot = copy.deepcopy(self.samples_per_layer)
        samples_plot = np.squeeze(samples_plot, axis=3)
        samples_plot = np.swapaxes(samples_plot, 0, 1)

        # get keep_ratio per layer
        ref_idx = self.names.index("ReferenceNet")
        kr_per_layer = (self._samples[None, None, :, None] * samples_plot /
                        samples_plot[:, :, :, ref_idx:ref_idx + 1])

        # grapher stats
        num_layers = samples_plot.shape[1]
        num_layers = int(num_layers)
        layers = np.fromiter(range(num_layers), dtype=np.int) + 1

        # data for sample sizes per layer plot with standard convention
        layers = np.tile(layers, (self._num_algorithms, 1)).transpose()
        layers = layers[np.newaxis, :, np.newaxis, :]

        # grapher labels
        y_label_error = f"{self.names_metrics[0]} Test Accuracy"
        y_label_error5 = f"{self.names_metrics[1]} Test Accuracy"
        y_label_loss = "Test Loss"

        # grapher stuff
        legend = copy.deepcopy(np.array(self.names)).tolist()
        colors = copy.deepcopy(np.array(self._colors)).tolist()
        title = ", ".join([
            self.param["network"]["name"],
            self.param["generated"]["datasetTest"].replace("_", "-"),
        ])

        def _do_graphs(x_label, x_data, tag):
            x_min = 1.0 - min(self._x_max, max(self._samples))
            x_max = 1.0 - max(self._x_min, min(self._samples))

            def _flip_data(arr):
                return 1.0 - arr

            # modify the xData to represent Prune Ratio...
            x_data = _flip_data(x_data)

            # y values ...
            acc = _flip_data(self.error)
            acc5 = _flip_data(self.error5)

            # global tag with test dataset
            global_tag_test = f"{self.global_tag}_{self.dataset_test}"

            # grapher initialization + plotting
            grapher_error = Grapher(
                x_values=x_data,
                y_values=acc,
                folder=self._plot_dir,
                file_name=global_tag_test + "_acc_" + tag + ".pdf",
                ref_idx=ref_idx,
                x_min=x_min,
                x_max=x_max,
                legend=legend,
                colors=colors,
                xlabel=x_label,
                ylabel=y_label_error,
                title=title,
            )
            img_err = grapher_error.graph(percentage_x=True,
                                          percentage_y=True,
                                          store=store_figs)

            grapher_error5 = Grapher(
                x_values=x_data,
                y_values=acc5,
                folder=self._plot_dir,
                file_name=global_tag_test + "_acc5_" + tag + ".pdf",
                ref_idx=ref_idx,
                x_min=x_min,
                x_max=x_max,
                legend=legend,
                colors=colors,
                xlabel=x_label,
                ylabel=y_label_error5,
                title=title,
            )
            img_err5 = grapher_error5.graph(percentage_x=True,
                                            percentage_y=True,
                                            store=store_figs)

            grapher_loss = Grapher(
                x_values=x_data,
                y_values=self.loss,
                folder=self._plot_dir,
                file_name=global_tag_test + "_loss_" + tag + ".pdf",
                ref_idx=ref_idx,
                x_min=x_min,
                x_max=x_max,
                legend=legend,
                colors=colors,
                xlabel=x_label,
                ylabel=y_label_loss,
                title=title,
            )
            img_loss = grapher_loss.graph(percentage_x=True, store=store_figs)

            # also write images to Tensorboard
            if store_figs:
                self.log_image(
                    self._writer_general,
                    f"{self.dataset_test} Test {self.names_metrics[0]} {tag}",
                    img_err,
                    0,
                )
                self.log_image(
                    self._writer_general,
                    f"{self.dataset_test} Test {self.names_metrics[1]} {tag}",
                    img_err5,
                    0,
                )
                self.log_image(
                    self._writer_general,
                    self.dataset_test + "Test Loss" + tag,
                    img_loss,
                    0,
                )

            return grapher_error, grapher_error5, grapher_loss

        # keep a list of figures around
        graphers = []

        # do parameter and flop plots
        graphers.extend(_do_graphs("Pruned Parameters", self.sizes, "param"))
        graphers.extend(_do_graphs("Pruned FLOPs", self.flops, "flops"))

        # do some layer-wise graphs
        title_layer = ", ".join([
            self.param["network"]["name"],
            self.param["network"]["dataset"].replace("_", "-"),
        ])

        def _do_layer_graph(x_label, y_label, y_data, tag, ref_idx=None):
            grapher_layer = Grapher(
                x_values=layers,
                y_values=y_data,
                folder=self._plot_dir,
                file_name=self.global_tag + f"_{tag}.pdf",
                ref_idx=ref_idx,
                x_min=np.min(layers),
                x_max=np.max(layers),
                legend=legend,
                colors=colors,
                xlabel=x_label,
                ylabel=y_label,
                title=title_layer,
            )
            img_layer = grapher_layer.graph_histo(show_delta=ref_idx
                                                  is not None,
                                                  store=store_figs)

            if store_figs:
                self.log_image(self._writer_general, tag, img_layer, 0)

            return grapher_layer

        graphers.append(
            _do_layer_graph(
                "Budget Allocation over Layers",
                "Percentage of Budget",
                samples_plot,
                "samples",
            ))

        graphers.append(
            _do_layer_graph(
                "Prune Ratio per Layer",
                "Prune Ratio",
                1 - kr_per_layer,
                "layer_pr",
                ref_idx,
            ))

        return graphers
Exemple #46
0
def to_junction_tree_model(model, algorithm) -> JunctionizedModel:
    """Builds equivalent model on a junction tree.

    First, builds a junction tree using algorithm from NetworkX which uses
    Minimum Fill-in heuristic.

    Then, builds a new model in which variables correspond to nodes in junction
    tree - we will call them "supervariables". Values of new supervariables are
    encoded values of original variables. New alphabet size is original
    alphabet size to the power of maximaljunction size. If some supervariables
    have less variables than others, we just don't use all available for
    encoding "address space". We mark those impossible values as having
    probability 0 (i.e log probability -inf).

    Fields in new model are calculated by multiplying all field and
    interaction factors on variables in the same supervariable. While doing
    this, we make sure that every factor is counted only once. If some factor
    was accounted for in one supervariable field, it won't be accounted for
    again in other supervariables.

    Interaction factors in new model contain consistency requirement. If
    a variable of original model appears in multiple supervariables, we allow
    only those states where it takes the same value in all supervariables. We
    achieve that by using interaction factors which are equal to 1 if values
    of the same original variable in different supervariables are equal, and
    0 if they are not equal. We actually use values 0 and -inf, because we
    work with logarithms.

    See https://en.wikipedia.org/wiki/Tree_decomposition.

    :param model: original model.
    :param algorithm: decomposition algorithm.
    :return: JunctionizedModel object, which contains junction tree and the
      new model, which is equivalent to original model, but whose graph is a
      tree.
    """
    # Build junction tree.
    graph = model.get_graph()
    if algorithm == 'min_fill_in':
        tree_width, junc_tree = treewidth_min_fill_in(graph)
    elif algorithm == 'min_degree':
        tree_width, junc_tree = treewidth_min_degree(graph)
    elif algorithm == 'auto':
        tree_width_1, junc_tree_1 = treewidth_min_fill_in(graph)
        tree_width_2, junc_tree_2 = treewidth_min_degree(graph)
        if tree_width_1 < tree_width_2:
            tree_width, junc_tree = tree_width_1, junc_tree_1
        else:
            tree_width, junc_tree = tree_width_2, junc_tree_2
    else:
        raise ValueError('Unknown treewidth decomposition algorithm %s' %
                         algorithm)

    jt_nodes = list(junc_tree.nodes())
    sv_size = tree_width + 1  # Supervariable size.

    new_gr_size = len(jt_nodes)  # New graph size.
    new_al_size = model.al_size**sv_size  # New alphabet size.
    if new_al_size > 1e6:
        raise TooMuchStatesError("New domain size is too large: %d." %
                                 new_al_size)

    # Build edge list in terms of indices in new graph.
    nodes_lookup = {jt_nodes[i]: i for i in range(len(jt_nodes))}
    new_edges = np.array([[nodes_lookup[u], nodes_lookup[v]]
                          for u, v in junc_tree.edges()])

    # Convert node lists to numpy arrays.
    jt_nodes = [np.fromiter(node, dtype=np.int32) for node in jt_nodes]

    # Calculate fields which describe interaction beteen supervariables.
    # If supervariable has less than ``sv_size`` variables, pad with -inf.
    # Then, when decoding, we will just throw away values from the left.
    # We should account for each factor of the old graph in exactly one factor
    # in the new graph. So, for field and interaction factors of the old graph
    # we keep track of whether we already took them, and don't take them for
    # the second time.
    new_field = np.ones((new_gr_size, new_al_size), dtype=np.float64) * -np.inf
    used_node_fields = set()
    for new_node_id in range(new_gr_size):
        old_nodes = jt_nodes[new_node_id]
        node_field = model.get_subgraph_factor_values(
            old_nodes, vars_skip=used_node_fields)
        new_field[new_node_id, 0:len(node_field)] = node_field
        used_node_fields.update(old_nodes)

    # Now, for every edge in new graph - add interaction factor requiring that
    # the same variable appearing in two supervariables always has the same
    # values.
    # We achieve this by using Kroenker delta function.
    # As we working with logarithms, we populate -inf for impossible states,
    # and 0 for possible states.
    new_interactions = np.zeros((len(new_edges), new_al_size, new_al_size))
    for edge_id in range(len(new_edges)):
        u, v = new_edges[edge_id]
        allowed = build_multi_delta(sv_size, model.al_size, jt_nodes[u],
                                    jt_nodes[v])
        new_interactions[edge_id, np.logical_not(allowed)] = -np.inf

    from inferlo.pairwise.pwf_model import PairWiseFiniteModel
    new_model = PairWiseFiniteModel.create(new_field, new_edges,
                                           new_interactions)
    return JunctionizedModel(new_model, jt_nodes, model.gr_size, model.al_size)
Exemple #47
0
def select_mesh_elems(context, mode, tool, tool_co, select_all_edges,
                      select_all_faces):
    box_xmin = box_xmax = box_ymin = box_ymax = center = radius = lasso = None
    vert_co = verts_mask_visin = vis_edges_mask_in = edge_count = edges_mask_visin = None

    if tool == 0:
        box_xmin, box_xmax, box_ymin, box_ymax = tool_co
    elif tool == 1:
        center, radius = tool_co
    else:  # shape == 2:
        lasso = tool_co

    region = context.region
    rv3d = context.region_data

    sel_obs = context.selected_objects if context.selected_objects else [
        context.object
    ]
    for ob in sel_obs:
        if ob.type == 'MESH':
            mesh_select_mode = context.tool_settings.mesh_select_mode

            ob.update_from_editmode()
            me = ob.data
            bm = bmesh.from_edit_mesh(me)

            # VERT PASS ####
            if mesh_select_mode[0] or mesh_select_mode[
                    1] or mesh_select_mode[2] and select_all_faces:

                verts = me.vertices
                vert_count = len(verts)

                # local coordinates of vertices
                vert_co_local = np.empty(vert_count * 3, "f")
                verts.foreach_get("co", vert_co_local)
                vert_co_local.shape = (vert_count, 3)

                # mask of visible vertices
                verts_mask_vis = np.empty(vert_count, "?")
                verts.foreach_get("hide", verts_mask_vis)
                verts_mask_vis = ~verts_mask_vis

                # local coordinates of visible vertices
                vis_vert_co_local = vert_co_local[verts_mask_vis]

                # world coordinates of visible vertices
                vis_vert_co_world = get_co_world_of_ob(ob, vis_vert_co_local)

                # 2d coordinates of visible vertices
                vert_co = np.empty((vert_count, 2), "f")
                vert_co[verts_mask_vis] = vis_vert_co = get_co_2d(
                    region, rv3d, vis_vert_co_world)

                # mask of vertices inside the selection polygon from visible vertices
                # box select
                if tool == 0:
                    vis_verts_mask_in = points_inside_rectangle(
                        vis_vert_co, box_xmin, box_xmax, box_ymin, box_ymax)
                # circle select
                elif tool == 1:
                    vis_verts_mask_in = points_inside_circle(
                        vis_vert_co, center, radius)
                # lasso select
                else:
                    vis_verts_mask_in = points_inside_polygon(vis_vert_co,
                                                              lasso,
                                                              prefilter=True)

                # mask of visible vertices inside the selection polygon from all vertices
                verts_mask_visin = np.full(vert_count, False, "?")
                verts_mask_visin[verts_mask_vis] = vis_verts_mask_in

                # do selection
                if mesh_select_mode[0]:
                    select = get_mesh_selection_mask(verts, vert_count,
                                                     verts_mask_visin, mode)

                    select_list = select.tolist()
                    for i, v in enumerate(bm.verts):
                        v.select = select_list[i]

            # EDGE PASS ####
            if mesh_select_mode[1] or mesh_select_mode[2] and select_all_faces:
                edges = me.edges
                edge_count = len(edges)

                # for each edge get 2 indices of its vertices
                edge_vert_indices = np.empty(edge_count * 2, "i")
                edges.foreach_get("vertices", edge_vert_indices)
                edge_vert_indices.shape = (edge_count, 2)

                # mask of visible edges
                edges_mask_vis = np.empty(edge_count, "?")
                edges.foreach_get("hide", edges_mask_vis)
                edges_mask_vis = ~edges_mask_vis

                # for each visible edge get 2 vertex indices
                vis_edge_vert_indices = edge_vert_indices[edges_mask_vis]

                # for each visible edge get mask of vertices in the selection polygon
                vis_edge_verts_mask_in = verts_mask_visin[
                    vis_edge_vert_indices]

                # try to select edges that are completely inside the selection polygon
                if not select_all_edges:
                    # mask of edges inside the selection polygon from visible edges
                    vis_edges_mask_in = vis_edge_verts_mask_in[:,
                                                               0] & vis_edge_verts_mask_in[:,
                                                                                           1]

                # if select_all_edges enabled or no inner edges found
                # then select edges that intersect the selection polygon
                if select_all_edges or (not select_all_edges and not np.any(vis_edges_mask_in)) or \
                        (mesh_select_mode[2] and select_all_faces):

                    # coordinates of vertices of visible edges
                    vis_edge_vert_co = vert_co[vis_edge_vert_indices]

                    # mask of edges from visible edges that have vertex inside the selection polygon and
                    # should be selected
                    vis_edges_mask_vert_in = vis_edge_verts_mask_in[:,
                                                                    0] | vis_edge_verts_mask_in[:,
                                                                                                1]

                    # selection polygon bbox
                    # box select
                    if tool == 0:
                        xmin, xmax, ymin, ymax = box_xmin, box_xmax, box_ymin, box_ymax
                    # circle select
                    elif tool == 1:
                        xmin, xmax, ymin, ymax = circle_bbox(center, radius)
                    # lasso select
                    else:
                        xmin, xmax, ymin, ymax = polygon_bbox(lasso)

                    # mask of edges from visible edges that have verts both laying outside of one of sides
                    # of selection polygon bbox, so they can't intersect the selection polygon and
                    # shouldn't be selected
                    vis_edges_mask_cant_isect = segments_completely_outside_rectangle(
                        vis_edge_vert_co, xmin, xmax, ymin, ymax)

                    # mask of edges from visible edges that may intersect selection polygon and
                    # should be tested for intersection
                    vis_edges_mask_may_isect = ~vis_edges_mask_vert_in & ~vis_edges_mask_cant_isect

                    # skip if there is no edges that need to be tested for intersection
                    if np.any(vis_edges_mask_may_isect):
                        # get coordinates of verts of visible edges that may intersect the selection polygon
                        may_isect_vis_edge_co = vis_edge_vert_co[
                            vis_edges_mask_may_isect]

                        # mask of edges that intersect the selection polygon from edges that may intersect it
                        # box select
                        if tool == 0:
                            may_isect_vis_edges_mask_isect = segments_intersect_rectangle(
                                may_isect_vis_edge_co, box_xmin, box_xmax,
                                box_ymin, box_ymax)
                        # circle select
                        elif tool == 1:
                            may_isect_vis_edges_mask_isect = segments_inside_or_intersect_circle(
                                may_isect_vis_edge_co, center, radius)
                        # lasso select
                        else:
                            may_isect_vis_edges_mask_isect = segments_intersect_polygon(
                                may_isect_vis_edge_co, lasso)

                        # mask of edges that intersect the selection polygon from visible edges
                        vis_edges_mask_in = vis_edges_mask_vert_in
                        vis_edges_mask_in[
                            vis_edges_mask_may_isect] = may_isect_vis_edges_mask_isect
                    else:
                        vis_edges_mask_in = vis_edges_mask_vert_in

                # mask of visible edges inside the selection polygon from all edges
                edges_mask_visin = np.full(edge_count, False, "?")
                edges_mask_visin[edges_mask_vis] = vis_edges_mask_in

                # do selection
                if mesh_select_mode[1]:
                    select = get_mesh_selection_mask(edges, edge_count,
                                                     edges_mask_visin, mode)

                    select_list = select.tolist()
                    for i, e in enumerate(bm.edges):
                        e.select = select_list[i]

            # FACE PASS #####
            if mesh_select_mode[2]:
                faces = me.polygons
                face_count = len(faces)

                # get mask of visible faces
                faces_mask_vis = np.empty(face_count, "?")
                faces.foreach_get("hide", faces_mask_vis)
                faces_mask_vis = ~faces_mask_vis

                # select faces which centers are inside the selection rectangle
                if not select_all_faces:
                    # local coordinates of face centers
                    face_center_co_local = np.empty(face_count * 3, "f")
                    faces.foreach_get("center", face_center_co_local)
                    face_center_co_local.shape = (face_count, 3)

                    # local coordinates of visible face centers
                    vis_face_center_co_local = face_center_co_local[
                        faces_mask_vis]

                    # world coordinates of visible face centers
                    vis_vert_co_world = get_co_world_of_ob(
                        ob, vis_face_center_co_local)

                    # 2d coordinates of visible face centers
                    vis_face_center_co = get_co_2d(region, rv3d,
                                                   vis_vert_co_world)

                    # mask of face centers inside the selection polygon from visible faces
                    # box select
                    if tool == 0:
                        vis_faces_mask_in = points_inside_rectangle(
                            vis_face_center_co, box_xmin, box_xmax, box_ymin,
                            box_ymax)
                    # circle select
                    elif tool == 1:
                        vis_faces_mask_in = points_inside_circle(
                            vis_face_center_co, center, radius)
                    # lasso select
                    else:
                        vis_faces_mask_in = points_inside_polygon(
                            vis_face_center_co, lasso, prefilter=True)

                    # mask of visible faces inside the selection polygon from all faces
                    faces_mask_visin = np.full(face_count, False, "?")
                    faces_mask_visin[faces_mask_vis] = vis_faces_mask_in
                else:
                    # mesh loops - edges that forms face polygons, sorted by face indices
                    loops = me.loops
                    loop_count = len(loops)

                    # number of vertices for each face
                    face_loop_totals = np.empty(face_count, "i")
                    faces.foreach_get("loop_total", face_loop_totals)

                    # skip getting faces from edges if there is no edges inside selection border
                    in_edge_count = np.count_nonzero(edges_mask_visin)
                    if in_edge_count:
                        # getting faces from bmesh is faster when a low number of faces need to be
                        # selected from a large number of total faces, otherwise numpy is faster
                        ratio = edge_count / in_edge_count

                        if ratio > 20:
                            # bmesh pass
                            visin_edge_indices = tuple(
                                np.nonzero(edges_mask_visin)[0])
                            in_face_indices = [[
                                face.index
                                for face in bm.edges[index].link_faces
                            ] for index in visin_edge_indices]

                            from itertools import chain
                            in_face_indices = set(
                                chain.from_iterable(in_face_indices))
                            c = len(in_face_indices)
                            in_face_indices = np.fromiter(
                                in_face_indices, "i", c)
                        else:
                            # numpy pass
                            # indices of face edges
                            loop_edge_indices = np.empty(loop_count, "i")
                            loops.foreach_get("edge_index", loop_edge_indices)

                            # index of face for each edge in mesh loop
                            face_indices = np.arange(face_count)
                            loop_face_indices = np.repeat(
                                face_indices, face_loop_totals)

                            # mask of visible edges in the selection polygon that are in mesh loops,
                            # therefore forming face polygons in the selection border
                            loop_edges_mask_visin = lookup_isin(
                                loop_edge_indices, edges_mask_visin)

                            # indices of faces inside the selection polygon
                            in_face_indices = np.unique(
                                loop_face_indices[loop_edges_mask_visin])

                        # mask of all faces in the selection polygon
                        faces_mask_in = np.full(face_count, False, "?")
                        faces_mask_in[in_face_indices] = np.True_
                        # mask of visible faces in the selection polygon
                        faces_mask_visin = faces_mask_vis & faces_mask_in
                    else:
                        faces_mask_in = faces_mask_visin = np.full(
                            face_count, False, "?")

                    # FACE POLY PASS ####
                    # select faces under cursor (faces that have the selection polygon inside their area)

                    # visible faces not in the selection polygon
                    faces_mask_visnoin = ~faces_mask_in & faces_mask_vis

                    # number of vertices of each visible face not in the selection polygon
                    visnoin_face_loop_totals = face_loop_totals[
                        faces_mask_visnoin]

                    # skip if all faces are already selected
                    if visnoin_face_loop_totals.size > 0:
                        # box select
                        if tool == 0:
                            cursor_co = (box_xmax, box_ymin
                                         )  # bottom right box corner
                        # circle select
                        elif tool == 1:
                            cursor_co = center
                        # lasso select
                        else:
                            cursor_co = lasso[0]

                        # indices of vertices of all faces
                        face_vert_indices = np.empty(loop_count, "i")
                        faces.foreach_get("vertices", face_vert_indices)

                        # mask of vertices not in the selection polygon from face vertices
                        face_verts_mask_visnoin = np.repeat(
                            faces_mask_visnoin, face_loop_totals)
                        # indices of vertices of visible faces not in the selection polygon
                        visnoin_face_vert_indices = face_vert_indices[
                            face_verts_mask_visnoin]
                        # coordinates of vertices of visible faces not in the selection polygon
                        visnoin_face_vert_co = vert_co[
                            visnoin_face_vert_indices]
                        # index of first face vertex in face vertex sequence
                        visnoin_face_cell_starts = np.insert(
                            visnoin_face_loop_totals[:-1].cumsum(), 0, 0)

                        # mask of faces that have cursor inside their polygon area
                        # from visible faces not in the selection polygon
                        visnoin_faces_mask_under = point_inside_polygons(
                            cursor_co,
                            visnoin_face_vert_co,
                            visnoin_face_cell_starts,
                            None,
                            visnoin_face_loop_totals,
                            prefilter=True)

                        # mask of visible faces under cursor from all faces
                        faces_mask_visunder = np.full(face_count, False, "?")
                        faces_mask_visunder[
                            faces_mask_visnoin] = visnoin_faces_mask_under

                        # mask of visible faces in the selection polygon and under cursor
                        faces_mask_visin[faces_mask_visunder] = np.True_

                # do selection
                select = get_mesh_selection_mask(faces, face_count,
                                                 faces_mask_visin, mode)

                select_list = select.tolist()
                for i, f in enumerate(bm.faces):
                    f.select = select_list[i]

            # flush face selection after selecting/deselecting edges and vertices
            bm.select_flush_mode()

            bmesh.update_edit_mesh(me, loop_triangles=False, destructive=False)
Exemple #48
0
def read_buffer(f):
    cells = []
    cell_data = {"su2:tag": []}

    itype = "i8"
    ftype = "f8"
    dim = 0

    next_tag_id = 0
    expected_nmarkers = 0
    markers_found = 0
    while True:
        line = f.readline()
        if not line:
            # EOF
            break

        line = line.strip()
        if len(line) == 0:
            continue
        if line[0] == "%":
            continue

        try:
            name, rest_of_line = line.split("=")
        except ValueError:
            logging.warning(
                f"meshio could not parse line\n {line}\n skipping.....")
            continue

        if name == "NDIME":
            dim = int(rest_of_line)
            if dim != 2 and dim != 3:
                raise ReadError(f"Invalid dimension value {line}")

        elif name == "NPOIN":
            # according to documentation rest_of_line should just be a int,
            # and the next block should be just the coordinates of the points
            # However, some file have one or two extra indices not related to the
            # actual coordinates.
            # So lets read the next line to find its actual number of columns
            #
            first_line = f.readline()
            first_line = first_line.split()
            first_line = np.array(first_line, dtype=ftype)

            extra_columns = first_line.shape[0] - dim

            num_verts = int(rest_of_line.split()[0]) - 1
            points = np.fromfile(f,
                                 count=num_verts * (dim + extra_columns),
                                 dtype=ftype,
                                 sep=" ").reshape(num_verts,
                                                  dim + extra_columns)

            # save off any extra info
            if extra_columns > 0:
                first_line = first_line[:-extra_columns]
                points = points[:, :-extra_columns]

            # add the first line we read separately
            points = np.vstack([first_line, points])

        elif name == "NELEM" or name == "MARKER_ELEMS":
            # we cannot? read at onece using numpy becasue we do not know the
            # total size. Read, instead next num_elems as is and re-use the
            # translate_cells function from vtk reader

            num_elems = int(rest_of_line)
            gen = islice(f, num_elems)

            # some files has an extra int column while other not
            # We do not need it so make sure we will skip it
            first_line_str = next(gen)
            first_line = first_line_str.split()
            nnodes = su2_type_to_numnodes[int(first_line[0])]
            has_extra_column = False
            if nnodes + 1 == len(first_line):
                has_extra_column = False
            elif nnodes + 2 == len(first_line):
                has_extra_column = True
            else:
                raise ReadError(f"Invalid number of columns for {name} field")

            # reset generator
            gen = chain([first_line_str], gen)

            cell_array = " ".join([line.rstrip("\n") for line in gen])
            cell_array = np.fromiter(cell_array.split(), dtype=itype)

            cells_, cell_data_ = _translate_cells(cell_array, has_extra_column)

            for eltype, data in cells_.items():
                cells.append(CellBlock(eltype, data))
                num_block_elems = len(data)
                if name == "NELEM":
                    cell_data["su2:tag"].append(
                        np.full(num_block_elems, 0, dtype=np.int32))
                else:
                    tags = np.full(num_block_elems,
                                   next_tag_id,
                                   dtype=np.int32)
                    cell_data["su2:tag"].append(tags)

        elif name == "NMARK":
            expected_nmarkers = int(rest_of_line)
        elif name == "MARKER_TAG":
            next_tag = rest_of_line
            try:
                next_tag_id = int(next_tag)
            except ValueError:
                next_tag_id += 1
                logging.warning(
                    "meshio does not support tags of string type.\n"
                    "    Surface tag {} will be replaced by {}".format(
                        rest_of_line, next_tag_id))
            markers_found += 1

    if markers_found != expected_nmarkers:
        logging.warning(
            f"expected {expected_nmarkers} markes according to NMARK value "
            f"but found only {markers_found}")

    # merge boundary elements in a single cellblock per cell type
    if dim == 2:
        types = ["line"]
    else:
        types = ["triangle", "quad"]

    indices_to_merge = {}
    for t in types:
        indices_to_merge[t] = []

    for index, cell_block in enumerate(cells):
        if cell_block.type in types:
            indices_to_merge[cell_block.type].append(index)

    cdata = cell_data["su2:tag"]
    for type, indices in indices_to_merge.items():
        if len(indices) > 1:
            cells[indices[0]] = CellBlock(
                type, np.concatenate([cells[i].data for i in indices]))
            cdata[indices[0]] = np.concatenate([cdata[i] for i in indices])

    # delete merged blocks
    idelete = []
    for type, indices in indices_to_merge.items():
        idelete += indices[1:]

    for i in sorted(idelete, reverse=True):
        del cells[i]
        del cdata[i]

    cell_data["su2:tag"] = cdata
    return Mesh(points, cells, cell_data=cell_data)
Exemple #49
0
def generate_cooccurrence(segments_filename, labels_to_idx, rootdir='.'):
    fn = os.path.join(rootdir, segments_filename)
    # The audio file is not large, so no need to multiprocess
    # keys: image index, values: {label index, score}
    segmentlabels = {}
    # keys: [(label index, other label index)], values: score
    coo = {}
    # need 2 passes through the file, one is to get all the segment IDs
    # and map them to indexes
    # we don't have URLs to these easily so for the moment ignore any mapping
    segment_to_idx = {}
    with open(fn, 'r') as fh:
        idx = 0
        for line in fh:
            if line.startswith("#"):
                continue
            tokens = line.strip().split(",")
            segment = tokens[0].strip()
            segment_to_idx[segment] = idx
            idx += 1
    allsegments = []
    with open(fn, 'r') as fh:
        csvreader = csv.reader(fh, delimiter=',', quotechar='"')
        for vals in csvreader:
            # File format is:
            """
            # Segments csv created Sun Mar  5 10:54:31 2017
            # num_ytids=22160, num_segs=22160, num_unique_labels=527, num_positive_labels=52882
            # YTID, start_seconds, end_seconds, positive_labels
            --PJHxphWEs, 30.000, 40.000, "/m/09x0r,/t/dd00088"
            --ZhevVpy1s, 50.000, 60.000, "/m/012xff"
            --aE2O5G5WE, 0.000, 10.000, "/m/03fwl,/m/04rlf,/m/09x0r"
            --aO5cdqSAg, 30.000, 40.000, "/t/dd00003,/t/dd00005"
            """
            if vals[0].startswith("#"):
                continue
            # fields we care about for now are:
            # ImageID (position 0)
            # LabelName (position 2)
            # Confidence (position 3)
            segmentname = vals[0]
            allsegments.append(segmentname)
            labels = vals[3:]
            # there is potentially more than 1 label
            segmentidx = segment_to_idx[segmentname]
            # keep track of what other labels are in this segment
            segmentlabels.setdefault(segmentidx, {})
            for label in labels:
                # labels may be padded with spaces or have a quote character
                label = label.strip().replace('"', "").strip()
                labelidx = labels_to_idx[label]
                segmentlabels[segmentidx].setdefault(labelidx, 0)
                score = 1  # if it was found in the file at all
                segmentlabels[segmentidx][labelidx] += score
                for otherlabel in segmentlabels[segmentidx]:
                    if labelidx == otherlabel:
                        continue
                    coo.setdefault((labelidx, otherlabel), 0)
                    coo.setdefault((otherlabel, labelidx), 0)
                    coo[(labelidx, otherlabel)] += score
                    coo[(otherlabel, labelidx)] += score
    sorted_keys = sorted(coo.keys())
    data = np.fromiter((coo[k] for k in sorted_keys),
                       dtype=float,
                       count=len(sorted_keys))
    skarr = np.array(sorted_keys).T
    i = torch.LongTensor(skarr)
    v = torch.FloatTensor(data)
    nlabels = len(labels_to_idx)
    coo_torch = torch.sparse.FloatTensor(i, v, (nlabels, nlabels))
    return coo_torch
def batch_sample(n, m, seed= 0):
    """yields `m` samples from `n` nats."""
    stream = sample(n, seed)
    while True:
        yield np.fromiter(stream, np.int, m)
Exemple #51
0
def sort_param(
    param: npt.ArrayLike,
    seperator: str = " ",
    check_duplicates: bool = True,
) -> NDArray[np.str_]:
    """Sort all atoms in an atom-based parameter set.

    Parameters represented by two atoms are simply sorted in alphabetical order.
    For parameters consisting of three atoms only the first and last atoms are
    sorted alphabetically.
    Parameters consisting of four or more atoms are not supported.

    Examples
    --------
    .. code-block ::

        >>> from FOX.functions.sorting import sort_param

        >>> param1 = [
        ...     "Cd Cd",
        ...     "Se Cd",
        ...     "Se Se",
        ... ]

        >>> param2 = [
        ...     "Cd Cd Cd",
        ...     "Se Cd Cd",
        ...     "Se Se Se",
        ... ]

        >>> sort_param(param1)
        array(['Cd Cd', 'Cd Se', 'Se Se'], dtype='<U5')

        >>> sort_param(param2)
        array(['Cd Cd Cd', 'Cd Cd Se', 'Se Se Se'], dtype='<U8')


    Parameters
    ----------
    param : array-like
        The to-be sorted parameters.
    seperator : :class:`str`
        The seperator used for splitting the atoms.
    check_duplicates : :class:`bool`
        Whether to check for duplicate elements after sorting the array.

    Returns
    -------
    :class:`np.ndarray[np.str_] <numpy.ndarray>`
        A new array with the atoms sorted within each parameter.

    Raises
    ------
    :exc:`ValueError`
        Raised when ``check_duplicates is True`` and duplicate parameters are present
        in the to-be returned array.

    """
    atoms: NDArray[np.str_] = np.asarray(param)
    if atoms.dtype.kind != "U":
        raise TypeError(
            f"Expected a string array; observed dtype: {atoms.dtype}")
    elif atoms.size == 0:
        return atoms if atoms is not param else atoms.copy()
    atoms_split = np.array(np.char.split(atoms, seperator).tolist())

    # Sort the atoms whenever dealing with atom-pair/triplet-based parameters
    n = atoms_split.shape[-1]
    if n == 1:
        ret = atoms if atoms is not param else atoms.copy()
    else:
        if n == 2:
            atoms_split.sort(axis=-1)
        elif n == 3:
            atoms_split[..., ::2].sort(axis=-1)
        else:
            raise NotImplementedError(
                f"Sorting parameters consisting of {n} atoms is not supported")

        iterator = (seperator.join(i) for i in atoms_split.reshape(-1, n))
        ret = np.fromiter(iterator, dtype=atoms.dtype, count=atoms.size)
        ret.shape = atoms.shape

    # Check for duplicates
    if not check_duplicates:
        return ret

    unique, idx, counts = np.unique(ret, return_index=True, return_counts=True)
    is_duplicate = counts != 1
    if is_duplicate.any():
        duplicates = unique[is_duplicate]
        raise ValueError(f"Duplicate parameters: {duplicates}")
    return ret
Exemple #52
0
 def from_iterable(itr):
     return (np.fromiter(itr, float))
Exemple #53
0
    def _recombine(self, dpack, spacks):
        "join sentences by parsing their heads"
        unrelated_lbl = dpack.label_number(UNRELATED)
        # intra-sentential predictions
        sent_lbl = self._mk_get_lbl(dpack, spacks)

        if self._verbose:
            # check for lost and hallucinated intra- edges
            self._check_intra_edges(dpack, spacks)

        # call inter-sentential parser
        dpack_inter = self._select_heads(dpack, spacks)
        has_inter = len(dpack_inter) > 0
        if has_inter:
            dpack_inter = self._parsers.inter.transform(dpack_inter)

        doc_lbl = self._mk_get_lbl(dpack, [dpack_inter])

        def merged_lbl(i):
            """Doc label where relevant else sentence label.

            Returns
            -------
            lbl:  string
                Predicted document-level label, else sentence-level
                label ; UNRELATED for missing values.
            """
            lbl = doc_lbl(i) if has_inter else None
            # missing document-level prediction: use sentence-level
            # prediction
            if lbl is None:
                lbl = sent_lbl(i)
            # fallback: it may have fallen through the cracks
            # (ie. may be neither in a sentence be a head)
            if lbl is None:
                lbl = unrelated_lbl
            return lbl

        # merge results
        prediction = np.fromiter((merged_lbl(i) for i in range(len(dpack))),
                                 dtype=np.dtype(np.int16))
        graph = dpack.graph.tweak(prediction=prediction)
        dpack = dpack.set_graph(graph)

        if self._verbose:
            # check for hallucinated and lost inter edges
            inter_edges_pred = [(edu1.id, edu2.id, sent_lbl(i))
                                for i, (edu1,
                                        edu2) in enumerate(dpack.pairings)
                                if (edu1.subgrouping != edu2.subgrouping
                                    and merged_lbl(i) != unrelated_lbl)]
            inter_edges_true = [(edu1.id, edu2.id, dpack.target[i])
                                for i, (edu1,
                                        edu2) in enumerate(dpack.pairings)
                                if (edu1.subgrouping != edu2.subgrouping
                                    and dpack.target[i] != unrelated_lbl)]
            if set(inter_edges_true) != set(inter_edges_pred):
                print('Lost inter edges: {}'.format(
                    sorted(set(inter_edges_true) - set(inter_edges_pred))))
                print()
                print('Hallucinated inter edges: {}'.format(
                    sorted(set(inter_edges_pred) - set(inter_edges_true))))

        return dpack
Exemple #54
0
def GridSplineToMesh2d(x,
                       y,
                       data,
                       xi,
                       yi,
                       default_value=np.nan,
                       plotonly=False,
                       fill_nans=False):  #{{{
    '''
	python analog to InterpFromGridToMesh.  This routine uses
	scipy.interpolate.CloughTocher2dInterpolator to create a bivariate spline
	interpolation of the input data and then return values of the spline
	on the x,y coordinates of the model mesh.  The interpolant is piece-wise
	cubic, C1 smooth (continuously differentiable) and has approximately 
	minimized curvature.  See "help(scipy.interpolate.CloughTocher2dInterpolator)"
	for more information on the routine.

	NOTE: this routine will not be appropriate if there are large holes (nan's) in 
	the input data.  A non-spline interpolation scheme should be used in that case.

	x,y:				vectors defining the coordinates of the input data
	data:				2D array of input data
	xi,yi:			x and y coordinates to be interpolated onto
	default_value:	default value if points lie outside the convex hull of input
						points (defaults to nan if not specified)
	plotonly:		plot the data to be interpolated using imshow (useful for
	fill_nans:		fill nan's (holes) in data using the spline fit? 

	Usage:
		interpdata=GridToMesh(x,y,data,xi,yi,default_value=np.nan,plotonly=False,fill_nans=False)

	Examples:
		interpdata=GridToMesh(x_m,y_m,data,md.mesh.x,md.mesh.y,0)
	'''

    if np.ndim(x) == 2:
        x = x.reshape(-1, )
    if np.ndim(y) == 2:
        y = y.reshape(-1, )
    if len(x) != data.shape[1] + 1 and len(x) != data.shape[1]:
        raise ValueError(
            'x should have same length as ncols(data) or ncols(data)+1')
    if len(y) != data.shape[0] + 1 and len(y) != data.shape[0]:
        raise ValueError(
            'y should have same length as nrows(data) or nrows(data)+1')

    # create sub-grid that just covers the limits of xi and yi
    dx = x[1] - x[0]
    dy = y[1] - y[0]
    xlim = [min(xi) - dx, max(xi) + dx]
    ylim = [min(yi) - dy, max(yi) + dy]

    # TODO create grid differently depending on whether data is defined at x,y
    # or at the center of a grid cell with corner coordinates defined by xi,yi
    # create points array and flattened data array
    if len(x) == data.shape[1] and len(y) == data.shape[0]:
        print '		x,y taken to define the center of data grid cells'
        xind = np.nonzero(np.logical_and(x > xlim[0], x < xlim[1]))[0]
        yind = np.nonzero(np.logical_and(y > ylim[0], y < ylim[1]))[0]
        xg, yg = np.meshgrid(x[xind], y[yind])
        subdata = data[yind[0]:yind[-1] + 1, xind[0]:xind[-1] + 1]
    elif len(x) == data.shape[1] + 1 and len(y) == data.shape[0] + 1:
        print '		x,y taken to define the corners of data grid cells'
        xcenter = np.fromiter(
            ((x[i] + x[i + 1]) / 2 for i in range(len(x) - 1)), np.float)
        ycenter = np.fromiter(
            ((y[i] + y[i + 1]) / 2 for i in range(len(y) - 1)), np.float)
        xind = np.nonzero(np.logical_and(xcenter > xlim[0],
                                         xcenter < xlim[1]))[0]
        yind = np.nonzero(np.logical_and(ycenter > ylim[0],
                                         ycenter < ylim[1]))[0]
        xg, yg = np.meshgrid(xcenter[xind], ycenter[yind])
        subdata = data[yind[0]:yind[-1] + 1, xind[0]:xind[-1] + 1]
    else:
        raise ValueError(
            'x and y have inconsistent sizes: both should have length ncols(data)/nrows(data) or ncols(data)+1/nrows(data)+1'
        )

    points = np.array([xg.ravel(), yg.ravel()]).T
    flatsubdata = subdata.ravel()

    if plotonly:
        plt.imshow(np.flipud(subdata), origin='upper')
        plt.show()
        return

    # mask out any nan's in the data and corresponding coordinate points
    mask = np.isnan(flatsubdata)
    ind = np.nonzero(mask)[0]
    if len(ind) and fill_nans:
        print "		WARNING: filling nans using spline fit through good data points, which may or may not be appropriate. Check results carefully."
    goodsubdata = np.delete(flatsubdata, ind)
    goodpoints = np.delete(points, ind, axis=0)

    # create spline and index spline at mesh points
    spline = CloughTocher2DInterpolator(goodpoints, goodsubdata)
    interpdata = spline(xi, yi)

    if not fill_nans:
        # identify nan's in xi,yi using nearest neighbors
        xyinterp = np.dstack([xi, yi])[0]
        xydata = np.dstack([xg.ravel(), yg.ravel()])[0]
        tree = cKDTree(xydata)
        nearest = tree.query(xyinterp)[1]
        pos = np.nonzero(np.isnan(flatsubdata[nearest]))
        interpdata[pos] = flatsubdata[nearest][pos]

    return interpdata
Exemple #55
0
def ebrisk(rupgetter, srcfilter, param, monitor):
    """
    :param rupgetter:
        a RuptureGetter instance
    :param srcfilter:
        a SourceFilter instance
    :param param:
        a dictionary of parameters
    :param monitor:
        :class:`openquake.baselib.performance.Monitor` instance
    :returns:
        an ArrayWrapper with shape (E, L, T, ...)
    """
    crmodel = param['crmodel']
    lba = param['lba']
    E = rupgetter.num_events
    L = len(lba.loss_names)
    N = len(srcfilter.sitecol.complete)
    e1 = rupgetter.first_event
    with monitor('getting assets', measuremem=False):
        with datastore.read(srcfilter.filename) as dstore:
            assetcol = dstore['assetcol']
        assets_by_site = assetcol.assets_by_site()
    A = len(assetcol)
    getter = getters.GmfGetter(rupgetter, srcfilter, param['oqparam'])
    with monitor('getting hazard'):
        getter.init()  # instantiate the computers
        hazard = getter.get_hazard_by_sid()  # sid -> (sid, eid, gmv)
    mon_risk = monitor('computing risk', measuremem=False)
    mon_agg = monitor('aggregating losses', measuremem=False)
    events = rupgetter.get_eid_rlz()
    # numpy.testing.assert_equal(events['eid'], sorted(events['eid']))
    eid2idx = dict(zip(events['eid'], range(e1, e1 + E)))
    tagnames = param['aggregate_by']
    shape = assetcol.tagcol.agg_shape((E, L), tagnames)
    elt_dt = [('event_id', U64), ('rlzi', U16), ('loss', (F32, shape[1:]))]
    if param['asset_loss_table']:
        alt = numpy.zeros((A, E, L), F32)
    acc = numpy.zeros(shape, F32)  # shape (E, L, T...)
    # NB: IMT-dependent weights are not supported in ebrisk
    times = numpy.zeros(N)  # risk time per site_id
    num_events_per_sid = 0
    epspath = param['epspath']
    gmf_nbytes = 0
    for sid, haz in hazard.items():
        gmf_nbytes += haz.nbytes
        t0 = time.time()
        assets_on_sid = assets_by_site[sid]
        if len(assets_on_sid) == 0:
            continue
        num_events_per_sid += len(haz)
        if param['avg_losses']:
            weights = getter.weights[[
                getter.eid2rlz[eid] for eid in haz['eid']
            ]]
        assets_by_taxo = get_assets_by_taxo(assets_on_sid, epspath)
        eidx = numpy.array([eid2idx[eid] for eid in haz['eid']]) - e1
        haz['eid'] = eidx + e1
        with mon_risk:
            out = get_output(crmodel, assets_by_taxo, haz)
        with mon_agg:
            for a, asset in enumerate(assets_on_sid):
                aid = asset['ordinal']
                tagi = asset[tagnames] if tagnames else ()
                tagidxs = tuple(idx - 1 for idx in tagi)
                losses_by_lt = {}
                for lti, lt in enumerate(crmodel.loss_types):
                    lratios = out[lt][a]
                    if lt == 'occupants':
                        losses = lratios * asset['occupants_None']
                    else:
                        losses = lratios * asset['value-' + lt]
                    if param['asset_loss_table']:
                        alt[aid, eidx, lti] = losses
                    losses_by_lt[lt] = losses
                for loss_idx, losses in lba.compute(asset, losses_by_lt):
                    acc[(eidx, loss_idx) + tagidxs] += losses
                    if param['avg_losses']:
                        lba.losses_by_A[aid, loss_idx] += (losses @ weights *
                                                           param['ses_ratio'])
            times[sid] = time.time() - t0
    if hazard:
        num_events_per_sid /= len(hazard)
    with monitor('building event loss table'):
        elt = numpy.fromiter(
            (
                (event['eid'], event['rlz'], losses)  # losses (L, T...)
                for event, losses in zip(events, acc) if losses.sum()),
            elt_dt)
        agg = general.AccumDict(accum=numpy.zeros(shape[1:], F32))  # rlz->agg
        for rec in elt:
            agg[rec['rlzi']] += rec['loss'] * param['ses_ratio']
    res = {
        'elt': elt,
        'agg_losses': agg,
        'times': times,
        'events_per_sid': num_events_per_sid,
        'gmf_nbytes': gmf_nbytes
    }

    res['losses_by_A'] = lba.losses_by_A
    if param['asset_loss_table']:
        eidx = numpy.array([eid2idx[eid] for eid in events['eid']])
        res['alt_eidx'] = alt, eidx
    return res
Exemple #56
0
    def _recombine(self, dpack, spacks):
        """Parse a document using partial parses for each subgroup.

        The current implementation behaves like the SoftParser, requiring
        a global model to score both the (almost fixed) intra edges and the
        inter edges.

        Parameters
        ----------
        dpack : DataPack
            Datapack for the whole document

        spacks : list of DataPack
            List of datapacks, one per subgroup (sentence).

        Returns
        -------
        dpack : DataPack
            Datapack for the whole document, filled with a parse.
        """
        unrelated_lbl = dpack.label_number(UNRELATED)
        # intra-sentential predictions
        sent_lbl = self._mk_get_lbl(dpack, spacks)

        if self._verbose:
            # check for lost and hallucinated intra- edges
            print('>>> check intra 1 >>>')
            self._check_intra_edges(dpack, spacks)
            print('<<< end check intra 1 <<<')

        # fix intra-sentential decisions before the inter-sentential phase
        dpack = self._fix_intra_edges(dpack, spacks)

        # call inter-sentential parser
        dpack_inter = self._select_frontiers(dpack, spacks)
        has_inter = len(dpack_inter) > 0
        if has_inter:
            # collect indices of inter pairings in dpack_inter
            # so we can instruct the inter parser to keep its nose
            # out of intra stuff
            inter_indices = idxes_inter(dpack_inter, include_fake_root=True)
            dpack_inter = self._parsers.inter.transform(
                dpack_inter, nonfixed_pairs=inter_indices)

        doc_lbl = self._mk_get_lbl(dpack, [dpack_inter])

        def merged_lbl(i):
            """Doc label where relevant else sentence label.

            Returns
            -------
            lbl:  string
                Predicted document-level label, else sentence-level
                label ; UNRELATED for missing values.
            """
            lbl = doc_lbl(i) if has_inter else None
            # missing document-level prediction: use sentence-level
            # prediction
            if lbl is None:
                lbl = sent_lbl(i)
            # fallback: it may have fallen through the cracks
            # (ie. may be neither in a sentence be a head)
            if lbl is None:
                lbl = unrelated_lbl
            return lbl

        # merge results
        prediction = np.fromiter((merged_lbl(i) for i in range(len(dpack))),
                                 dtype=np.dtype(np.int16))
        graph = dpack.graph.tweak(prediction=prediction)
        dpack = dpack.set_graph(graph)

        if self._verbose:
            # 2nd check for lost and hallucinated intra- edges
            print('>>> check intra 2 >>>')
            self._check_intra_edges(dpack, spacks)
            print('<<< end check intra 2 <<<')
            # check for lost and hallucinated inter- edges
            # TODO turn into _check_inter_edges
            inter_edges_pred = [(edu1.id, edu2.id, merged_lbl(i))
                                for i, (edu1,
                                        edu2) in enumerate(dpack.pairings)
                                if (edu1.subgrouping != edu2.subgrouping
                                    and merged_lbl(i) != unrelated_lbl)]
            inter_edges_true = [(edu1.id, edu2.id, dpack.target[i])
                                for i, (edu1,
                                        edu2) in enumerate(dpack.pairings)
                                if (edu1.subgrouping != edu2.subgrouping
                                    and dpack.target[i] != unrelated_lbl)]
            if set(inter_edges_true) != set(inter_edges_pred):
                print('Lost inter edges: {}'.format(
                    sorted(set(inter_edges_true) - set(inter_edges_pred))))
                print()
                print('Hallucinated inter edges: {}'.format(
                    sorted(set(inter_edges_pred) - set(inter_edges_true))))

        return dpack
Exemple #57
0
import numpy as np


def readwords():
    with open('input/day1.txt') as f:
        for line in f.read().splitlines():
            yield int(line.strip())


inp = np.fromiter(readwords(), int)
increased = inp[1:] > inp[0:-1]
print(sum(increased))

# part two
sums = inp[0:-2] + inp[1:-1] + inp[2:]
increased = sums[1:] > sums[0:-1]
print(sum(increased))
Exemple #58
0
def MakeNdarray(tensor):
    """Create a numpy ndarray from a tensor.

  Create a numpy ndarray with the same shape and data as the tensor.

  Args:
    tensor: A TensorProto.

  Returns:
    A numpy array with the tensor contents.

  Raises:
    TypeError: if tensor has unsupported type.

  """
    shape = [d.size for d in tensor.tensor_shape.dim]
    num_elements = np.prod(shape)
    tensor_dtype = dtypes.as_dtype(tensor.dtype)
    dtype = tensor_dtype.as_numpy_dtype

    if tensor.tensor_content:
        return np.fromstring(tensor.tensor_content, dtype=dtype).reshape(shape)
    elif tensor_dtype == dtypes.float16:
        # the half_val field of the TensorProto stores the binary representation
        # of the fp16: we need to reinterpret this as a proper float16
        if len(tensor.half_val) == 1:
            tmp = np.array(tensor.half_val[0], dtype=np.uint16)
            tmp.dtype = np.float16
            return np.repeat(tmp, num_elements).reshape(shape)
        else:
            tmp = np.fromiter(tensor.half_val, dtype=np.uint16)
            tmp.dtype = np.float16
            return tmp.reshape(shape)
    elif tensor_dtype == dtypes.float32:
        if len(tensor.float_val) == 1:
            return np.repeat(np.array(tensor.float_val[0], dtype=dtype),
                             num_elements).reshape(shape)
        else:
            return np.fromiter(tensor.float_val, dtype=dtype).reshape(shape)
    elif tensor_dtype == dtypes.float64:
        if len(tensor.double_val) == 1:
            return np.repeat(np.array(tensor.double_val[0], dtype=dtype),
                             num_elements).reshape(shape)
        else:
            return np.fromiter(tensor.double_val, dtype=dtype).reshape(shape)
    elif tensor_dtype in [
            dtypes.int32, dtypes.uint8, dtypes.uint16, dtypes.int16,
            dtypes.int8, dtypes.qint32, dtypes.quint8, dtypes.qint8,
            dtypes.qint16, dtypes.quint16, dtypes.bfloat16
    ]:
        if len(tensor.int_val) == 1:
            return np.repeat(np.array(tensor.int_val[0], dtype=dtype),
                             num_elements).reshape(shape)
        else:
            return np.fromiter(tensor.int_val, dtype=dtype).reshape(shape)
    elif tensor_dtype == dtypes.int64:
        if len(tensor.int64_val) == 1:
            return np.repeat(np.array(tensor.int64_val[0], dtype=dtype),
                             num_elements).reshape(shape)
        else:
            return np.fromiter(tensor.int64_val, dtype=dtype).reshape(shape)
    elif tensor_dtype == dtypes.string:
        if len(tensor.string_val) == 1:
            return np.repeat(np.array(tensor.string_val[0], dtype=dtype),
                             num_elements).reshape(shape)
        else:
            return np.array([x for x in tensor.string_val],
                            dtype=dtype).reshape(shape)
    elif tensor_dtype == dtypes.complex64:
        it = iter(tensor.scomplex_val)
        if len(tensor.scomplex_val) == 2:
            return np.repeat(
                np.array(complex(tensor.scomplex_val[0],
                                 tensor.scomplex_val[1]),
                         dtype=dtype), num_elements).reshape(shape)
        else:
            return np.array([complex(x[0], x[1]) for x in zip(it, it)],
                            dtype=dtype).reshape(shape)
    elif tensor_dtype == dtypes.complex128:
        it = iter(tensor.dcomplex_val)
        if len(tensor.dcomplex_val) == 2:
            return np.repeat(
                np.array(complex(tensor.dcomplex_val[0],
                                 tensor.dcomplex_val[1]),
                         dtype=dtype), num_elements).reshape(shape)
        else:
            return np.array([complex(x[0], x[1]) for x in zip(it, it)],
                            dtype=dtype).reshape(shape)
    elif tensor_dtype == dtypes.bool:
        if len(tensor.bool_val) == 1:
            return np.repeat(np.array(tensor.bool_val[0], dtype=dtype),
                             num_elements).reshape(shape)
        else:
            return np.fromiter(tensor.bool_val, dtype=dtype).reshape(shape)
    else:
        raise TypeError("Unsupported tensor type: %s" % tensor.dtype)
Exemple #59
0
from mpl_toolkits.mplot3d import Axes3D

try:
    import numpy as np
except:
    exit()

from deap import benchmarks


def schwefel_arg0(sol):
    return benchmarks.schwefel(sol)[0]


fig = plt.figure()
# ax = Axes3D(fig, azim = -29, elev = 50)
ax = Axes3D(fig)
X = np.arange(-500, 500, 10)
Y = np.arange(-500, 500, 10)
X, Y = np.meshgrid(X, Y)
Z = np.fromiter(map(schwefel_arg0, zip(X.flat, Y.flat)),
                dtype=np.float,
                count=X.shape[0] * X.shape[1]).reshape(X.shape)

ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.jet, linewidth=0.2)

plt.xlabel("x")
plt.ylabel("y")

plt.show()
Exemple #60
0
 def k_array_constructor(list_of_parameters):
     """ Constructs Δy (k_array) for every component. """
     return np.fromiter(
         (h * f(list_of_parameters) for f in derivatives), np.float,
         n)  # numpy.array from iterator, dimension: n