Example #1
0
def mesh_map(nnode, ncell, cnode, blockid, coords, filename, scale_factor):
    # Copy data to contiguous arrays and cast as needed
    cnodec = sp.ascontiguousarray(cnode)
    cnodec = cnodec.astype(sp.int32, casting="same_kind", copy=False)

    blockidc = sp.ascontiguousarray(blockid)
    blockidc = blockidc.astype(sp.int32, casting="same_kind", copy=False)

    coordsc = sp.ascontiguousarray(coords)
    coordsc = coordsc.astype(sp.float64, casting="same_kind", copy=False)

    # send back data on the mesh mapping and the new mesh
    cdata = libgridmap.mesh_map(
        nnode, ncell, cnodec.ctypes.data_as(ctypes.POINTER(ctypes.c_int)),
        blockidc.ctypes.data_as(ctypes.POINTER(ctypes.c_int)),
        coordsc.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
        bytes(filename, "utf-8"), scale_factor)

    # pull data out of the addresses indicated, and put in scipy arrays
    return MapData(
        cdata.ncell, cdata.nnode,
        sp.ctypeslib.as_array(cdata.coord, shape=(cdata.nnode, 3)),
        sp.ctypeslib.as_array(cdata.connect, shape=(cdata.ncell, 8)),
        sp.ctypeslib.as_array(cdata.blockid, shape=(cdata.ncell, )),
        cdata.mesh_map)
Example #2
0
def mcfilter(mc_data, mc_filt):
    """filter a multi-channeled signal with a multi-channeled filter

    This is the Python implementation for batch mode filtering. The signal
    will be zeros on both ends to overcome filter artifacts.

    :type mc_data: ndarray
    :param mc_data: signal data [data_samples, channels]
    :type mc_filt: ndarray
    :param mc_filt: FIR filter [filter_samples, channels]
    :rtype: ndarray
    :returns: filtered signal [data_samples]
    """

    if CYTHON_AVAILABLE is True:
        dtype = mc_data.dtype
        if dtype not in [sp.float32, sp.float64]:
            dtype = sp.float32
        if mc_data.shape[1] != mc_filt.shape[1]:
            raise ValueError("channel count does not match")
        mc_data, mc_filt = (sp.ascontiguousarray(mc_data, dtype=dtype),
                            sp.ascontiguousarray(mc_filt, dtype=dtype))
        if dtype == sp.float32:
            return _mcfilter_cy32(mc_data, mc_filt)
        elif dtype == sp.float64:
            return _mcfilter_cy64(mc_data, mc_filt)
        else:
            raise TypeError("dtype is not float32 or float64: %s" % dtype)
    else:
        return _mcfilter_py(mc_data, mc_filt)
Example #3
0
def mcfilter(mc_data, mc_filt):
    """filter a multi-channeled signal with a multi-channeled filter

    This is the Python implementation for batch mode filtering. The signal
    will be zeros on both ends to overcome filter artifacts.

    :type mc_data: ndarray
    :param mc_data: signal data [data_samples, channels]
    :type mc_filt: ndarray
    :param mc_filt: FIR filter [filter_samples, channels]
    :rtype: ndarray
    :returns: filtered signal [data_samples]
    """

    if CYTHON_AVAILABLE is True:
        dtype = mc_data.dtype
        if dtype not in [sp.float32, sp.float64]:
            dtype = sp.float32
        if mc_data.shape[1] != mc_filt.shape[1]:
            raise ValueError("channel count does not match")
        mc_data, mc_filt = (sp.ascontiguousarray(mc_data, dtype=dtype),
                            sp.ascontiguousarray(mc_filt, dtype=dtype))
        if dtype == sp.float32:
            return _mcfilter_cy32(mc_data, mc_filt)
        elif dtype == sp.float64:
            return _mcfilter_cy64(mc_data, mc_filt)
        else:
            raise TypeError("dtype is not float32 or float64: %s" % dtype)
    else:
        return _mcfilter_py(mc_data, mc_filt)
Example #4
0
	def init_likelihood_paired(self):
		"""
		Same as ::init_likelihood_single:: except does it for paired regions
		Initializes self.P
		"""
		self.P = zeros((self.nnode_p, self.ncol_p, self.nbase_p))
		for k in xrange(self.msa.nseq): # for each leaf node k
			self.P[k, :, :] = -scipy.inf 
			seq = self.msa.aln[k]
			for ind,(j1,j2) in enumerate(self.paired_cols):
				p = seq[j1] + seq[j2]
				if p not in self.paired_model.pair_index:
					print >> sys.stderr, "temporarily assign ambiguous codes to AA"
					p = 'AA'
				if '-' in p and self.treat_gap_as_missing:
					if p == '--': 
						toadd = filter(lambda x: '-' not in x, self.paired_model.pair_index)
						assert len(toadd) == 16
						for x in toadd:
							self.P[k, ind, self.paired_model.pair_index[x]] = log(1./16)
					elif p.endswith('-'):
						toadd = filter(lambda x: '-' not in x and x[0]==p[0], self.paired_model.pair_index)
						assert len(toadd) == 4
						for x in toadd:
							self.P[k, ind, self.paired_model.pair_index[x]] = log(1./4)
					else:
						toadd = filter(lambda x: '-' not in x and x[1]==p[1], self.paired_model.pair_index)
						assert len(toadd) == 4
						for x in toadd:
							self.P[k, ind, self.paired_model.pair_index[x]] = log(1./4)
					print p, toadd
				else:
					self.P[k, ind, self.paired_model.pair_index[p]] = 0
		self.P = scipy.ascontiguousarray(self.P.reshape(self.P.size))
Example #5
0
    def testMainSingle(self, verbose=VERBOSE.PLOT):
        import time

        # setup
        V = VERBOSE(verbose)
        TF = 21
        NC = 2
        spike_proto_sc = sp.cos(sp.linspace(-sp.pi, 3 * sp.pi, TF))
        spike_proto_sc *= sp.hanning(TF)
        scale = sp.linspace(0, 2, TF)
        xi1 = sp.vstack(
            (spike_proto_sc * 5 * scale, spike_proto_sc * 4 * scale)).T
        xi2 = sp.vstack((spike_proto_sc * .5 * scale[::-1],
                         spike_proto_sc * 9 * scale[::-1])).T
        templates = sp.asarray([xi1, xi2])
        LEN = 2000
        noise = sp.randn(LEN, NC)
        ce = TimeSeriesCovE(tf_max=TF, nc=NC)
        ce.update(noise)
        FB = BOTMNode(templates=templates, ce=ce, verbose=V, ovlp_taus=None)
        signal = sp.zeros_like(noise)
        NPOS = 4
        POS = [(int(i * LEN / (NPOS + 1)), 100) for i in xrange(1, NPOS + 1)]
        POS.append((100, 2))
        POS.append((150, 2))
        for pos, tau in POS:
            signal[pos:pos + TF] += xi1
            signal[pos + tau:pos + tau + TF] += xi2
        x = sp.ascontiguousarray(signal + noise, dtype=sp.float32)

        # test against
        if V.has_print:
            print '### constructed spike times ###'
        test_u0 = sorted([t_tpl[0] for t_tpl in POS])
        test_u1 = sorted([t_tpl[0] + t_tpl[1] for t_tpl in POS])
        test_rval = {
            0: sp.array(test_u0) + TF / 2,
            1: sp.array(test_u1) + TF / 2
        }
        if V.has_print:
            print test_rval

        # sort
        tic_o = time.clock()
        FB(x)
        toc_o = time.clock()
        if V.has_print:
            print '### sorting spike times ###'
            print FB.rval

        if V.has_plot:
            FB.plot_template_set(show=False)
            FB.plot_sorting(show=True)

        if V.has_print:
            print '###'
            print 'duration:', toc_o - tic_o

        for k in FB.rval:
            assert_array_almost_equal(FB.rval[k], test_rval[k], decimal=0)
Example #6
0
def load_feature_matrix(src, dtype=sp.float32):
    if src.endswith(".npz"):
        return smat.load_npz(src).tocsr().astype(dtype)
    elif src.endswith(".npy"):
        return smat.csr_matrix(sp.ascontiguousarray(sp.load(src), dtype=dtype))
    else:
        raise ValueError("src must end with .npz or .npy")
Example #7
0
    def _execute(self, x):
        """apply the filter to data"""

        # DOC: ascontiguousarray is here for ctypes/cython purposes
        x_in = sp.ascontiguousarray(x, dtype=self.dtype)[:, self._chan_set]
        rval, self._hist = mcfilter_hist(x_in, self._f, self._hist)
        return rval
Example #8
0
	def init_likelihood_single(self):
		"""
		Initialize likelihood[k][j] for k=0,1,2...n-1 (the n leaves) and all columns j
		likelihood[k] is a L x 5 matrix where likelihood[k][j,x] is log(1) iff j-th column
		of (leaf) sequence k is the x-th base determined by SingleModel.nucleotide_index
		(order should be A,C,G,T,-)

		Initializes self.S
		"""
		self.S = zeros((self.nnode, self.ncol, self.nbase))
		self.S[:] = -scipy.inf
		for k in xrange(self.msa.nseq):
			seq = self.msa.aln[k] # sequence of the k-th leaf
			for j,col in enumerate(self.single_cols):
				p = seq[col]
				if p not in self.single_model.nucleotide_index: # TODO: delete later
					print >> sys.stderr, "temporarily assign ambiguous codes to A"
					p = 'A'
				if p == '-' and self.treat_gap_as_missing:
					print >> sys.stderr, "treating gap as missing data!"
					self.S[k, j, self.single_model.nucleotide_index['A']] = log(.25)
					self.S[k, j, self.single_model.nucleotide_index['T']] = log(.25)
					self.S[k, j, self.single_model.nucleotide_index['C']] = log(.25)
					self.S[k, j, self.single_model.nucleotide_index['G']] = log(.25)
				else:
					self.S[k, j, self.single_model.nucleotide_index[p]] = 0 # 0 means log(1)
		self.S = scipy.ascontiguousarray(self.S.reshape(self.S.size))
Example #9
0
def unique_rows(arr):
    """Returns a copy of arr with duplicate rows removed.
    
    From Stackoverflow "Find unique rows in numpy.array."
    
    Parameters
    ----------
    arr : :py:class:`Array`, (`m`, `n`). The array to find the unique rows of.
    
    Returns
    -------
    unique : :py:class:`Array`, (`p`, `n`) where `p` <= `m`
        The array `arr` with duplicate rows removed.
    """
    b = scipy.ascontiguousarray(arr).view(
        scipy.dtype((scipy.void, arr.dtype.itemsize * arr.shape[1])))
    try:
        dum, idx = scipy.unique(b, return_index=True)
    except TypeError:
        # Handle bug in numpy 1.6.2:
        rows = [_Row(row) for row in b]
        srt_idx = sorted(range(len(rows)), key=rows.__getitem__)
        rows = scipy.asarray(rows)[srt_idx]
        row_cmp = [-1]
        for k in xrange(1, len(srt_idx)):
            row_cmp.append(rows[k - 1].__cmp__(rows[k]))
        row_cmp = scipy.asarray(row_cmp)
        transition_idxs = scipy.where(row_cmp != 0)[0]
        idx = scipy.asarray(srt_idx)[transition_idxs]
    return arr[idx]
Example #10
0
    def _execute(self, x):
        """apply the filter to data"""

        # DOC: ascontiguousarray is here for ctypes/cython purposes
        x_in = sp.ascontiguousarray(x, dtype=self.dtype)[:, self._chan_set]
        rval, self._hist = mcfilter_hist(x_in, self._f, self._hist)
        return rval
Example #11
0
def unique_rows(x):
    """This function takes a 2D scipy array x and makes it unique by rows."""

    y = sp.ascontiguousarray(x).view(sp.dtype((sp.void, x.dtype.itemsize * x.shape[1])))
    _, idx = sp.unique(y, return_index=True)

    return x[idx]
Example #12
0
def unique_rows(arr):
    """Returns a copy of arr with duplicate rows removed.
    
    From Stackoverflow "Find unique rows in numpy.array."
    
    Parameters
    ----------
    arr : :py:class:`Array`, (`m`, `n`). The array to find the unique rows of.
    
    Returns
    -------
    unique : :py:class:`Array`, (`p`, `n`) where `p` <= `m`
        The array `arr` with duplicate rows removed.
    """
    b = scipy.ascontiguousarray(arr).view(
        scipy.dtype((scipy.void, arr.dtype.itemsize * arr.shape[1]))
    )
    try:
        dum, idx = scipy.unique(b, return_index=True)
    except TypeError:
        # Handle bug in numpy 1.6.2:
        rows = [_Row(row) for row in b]
        srt_idx = sorted(range(len(rows)), key=rows.__getitem__)
        rows = scipy.asarray(rows)[srt_idx]
        row_cmp = [-1]
        for k in xrange(1, len(srt_idx)):
            row_cmp.append(rows[k-1].__cmp__(rows[k]))
        row_cmp = scipy.asarray(row_cmp)
        transition_idxs = scipy.where(row_cmp != 0)[0]
        idx = scipy.asarray(srt_idx)[transition_idxs]
    return arr[idx]
Example #13
0
def unique_rows(x):
    """This function takes a 2D scipy array x and makes it unique by rows."""

    y = sp.ascontiguousarray(x).view(
        sp.dtype((sp.void, x.dtype.itemsize * x.shape[1])))
    _, idx = sp.unique(y, return_index=True)

    return x[idx]
Example #14
0
def tc_read(filename):
    m, n, k = c_int(0), c_int(0), c_int(0)
    _clib.tc_read_size(_to_cstr(filename), byref(m), byref(n), byref(k))
    W = sp.zeros((m.value, k.value), dtype=sp.float64)
    H = sp.zeros((k.value, n.value), dtype=sp.float64)
    Wptr = W.ctypes.data_as(POINTER(c_double))
    Hptr = H.ctypes.data_as(POINTER(c_double))
    _clib.tc_read_content(_to_cstr(filename), m, n, k, Wptr, Hptr)
    return W, sp.ascontiguousarray(H.T)
Example #15
0
def main(msa_filename, tree_filename, single_model_filename=os.path.join(os.environ['LCODE'],'data/single_model'), \
		paired_model_filename=os.path.join(os.environ['LCODE'],'data/pair_model')):
	from MSA import MSA
	from EvoModel import SingleModel, PairedModel
	from Tree import *

	msa = MSA(msa_filename)

	single_model = SingleModel(single_model_filename)
	paired_model = PairedModel(paired_model_filename, single_model)

	# --------------- using newick ---------------------
#	acc = list(msa.ids)
#	post_order_traversal(t, acc)
#	order = acc[msa.nseq:]
	# -------------- using dendropy -------------------
	t2 = dendropy.Tree.get_from_path(tree_filename, 'newick')
	msa.remove_seqs_not_in_tree([x.taxon.label for x in t2.leaf_nodes()])
	t = t2
	order = postorder_assign_then_traverse(t, list(msa.ids))
	
	single_cols = xrange(msa.aln_len)
	paired_cols = msa.BP.items()
	paired_cols.sort()
	n = msa.nseq

	S = init_likelihood(msa, single_cols, single_model)

	g = MyMat.calc_likelihood
	# NOTE: NO LONGER logs the single model Frequency!
	# first calculate the null model (joint indep prob at each position)
	# TODO: this is not the fastest code ever....but will do for now
	L_null = [sum(sum(exp(S[:msa.nseq, col, :4]) * log(single_model.Frequency))) for col in single_cols]

	# convert S into 1d
	nnode, ncol, nbase = S.shape
	S = scipy.ascontiguousarray(S.reshape(S.size))

	P = init_likelihood_paired(msa, paired_cols, paired_model, nnode)
	nnode_p, ncol_p, nbase_p = P.shape
	P = scipy.ascontiguousarray(P.reshape(P.size))

	like_s, like_s_n_p, S, P = calc_likelihood(msa, order, single_model, paired_model) # need to use this to set up S, P for rearr
	return like_s_n_p
Example #16
0
def mcfilter_hist(mc_data, mc_filt, mc_hist=None):
    """filter a multichanneled signal with a multichanneled fir filter

    This is the Python implementation for online mode filtering with a
    chunk-wise history item, holding the last samples of tha preceding chunk.

    :type mc_data: ndarray
    :param mc_data: signal data [data_samples, channels]
    :type mc_filt: ndarray
    :param mc_filt: FIR filter [filter_samples, channels]
    :type mc_hist:
    :param mc_hist: history [hist_samples, channels]. the history is of size
        ´filter_samples - 1´. If None, this will be substituted with zeros.
    :rtype: tuple(ndarray,ndarray)
    :returns: filter output [data_samples], history item [hist_samples,
        channels]
    """

    if mc_hist is None:
        mc_hist = sp.zeros((mc_filt.shape[0] - 1, mc_data.shape[0]))
    if mc_hist.shape[0] + 1 != mc_filt.shape[0]:
        raise ValueError("len(history)+1[%d] != len(filter)[%d]" %
                         (mc_hist.shape[0] + 1, mc_filt.shape[0]))
    if CYTHON_AVAILABLE is True:
        dtype = mc_data.dtype
        if dtype not in [sp.float32, sp.float64]:
            dtype = sp.float32
        if mc_data.shape[1] != mc_filt.shape[1]:
            raise ValueError("channel count does not match")
        mc_data, mc_filt, mc_hist = (sp.ascontiguousarray(mc_data,
                                                          dtype=dtype),
                                     sp.ascontiguousarray(mc_filt,
                                                          dtype=dtype),
                                     sp.ascontiguousarray(mc_hist,
                                                          dtype=dtype))
        if dtype == sp.float32:
            return _mcfilter_hist_cy32(mc_data, mc_filt, mc_hist)
        elif dtype == sp.float64:
            return _mcfilter_hist_cy64(mc_data, mc_filt, mc_hist)
        else:
            raise TypeError("dtype is not float32 or float64: %s" % dtype)
    else:
        return _mcfilter_hist_py(mc_data, mc_filt, mc_hist)
Example #17
0
def map_cell_field(src, mesh_map):
    # Copy to contiguous arrays and cast as needed,
    # then send pointer
    srcc = sp.ascontiguousarray(src)
    srcc = srcc.astype(sp.float64, casting="same_kind", copy=False)
    dest = sp.empty_like(srcc)
    libgridmap.map_cell_field_c(
        mesh_map, 1, len(srcc), 0.,
        srcc.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
        dest.ctypes.data_as(ctypes.POINTER(ctypes.c_double)))
    return dest
Example #18
0
def mcfilter_hist(mc_data, mc_filt, mc_hist=None):
    """filter a multichanneled signal with a multichanneled fir filter

    This is the Python implementation for online mode filtering with a
    chunk-wise history item, holding the last samples of tha preceding chunk.

    :type mc_data: ndarray
    :param mc_data: signal data [data_samples, channels]
    :type mc_filt: ndarray
    :param mc_filt: FIR filter [filter_samples, channels]
    :type mc_hist:
    :param mc_hist: history [hist_samples, channels]. the history is of size
        ´filter_samples - 1´. If None, this will be substituted with zeros.
    :rtype: tuple(ndarray,ndarray)
    :returns: filter output [data_samples], history item [hist_samples,
        channels]
    """

    if mc_hist is None:
        mc_hist = sp.zeros((mc_filt.shape[0] - 1, mc_data.shape[0]))
    if mc_hist.shape[0] + 1 != mc_filt.shape[0]:
        raise ValueError("len(history)+1[%d] != len(filter)[%d]" %
                         (mc_hist.shape[0] + 1, mc_filt.shape[0]))
    if CYTHON_AVAILABLE is True:
        dtype = mc_data.dtype
        if dtype not in [sp.float32, sp.float64]:
            dtype = sp.float32
        if mc_data.shape[1] != mc_filt.shape[1]:
            raise ValueError("channel count does not match")
        mc_data, mc_filt, mc_hist = (
            sp.ascontiguousarray(mc_data, dtype=dtype),
            sp.ascontiguousarray(mc_filt, dtype=dtype),
            sp.ascontiguousarray(mc_hist, dtype=dtype))
        if dtype == sp.float32:
            return _mcfilter_hist_cy32(mc_data, mc_filt, mc_hist)
        elif dtype == sp.float64:
            return _mcfilter_hist_cy64(mc_data, mc_filt, mc_hist)
        else:
            raise TypeError("dtype is not float32 or float64: %s" % dtype)
    else:
        return _mcfilter_hist_py(mc_data, mc_filt, mc_hist)
Example #19
0
def calc_likelihood(msa, order, single_model, paired_model):
	"""
	single_model.Frequency AND paired_model.Frequency should both NOT be in log scale!!

	Returns <single likelihood>, <paired likelihood>, S, P
	"""
	g = MyMat.calc_likelihood
	paired_cols = msa.BP.items()
	paired_cols.sort()
	single_cols = filter(lambda i: i not in msa.BP and i not in msa.BP.itervalues(), xrange(msa.aln_len))
	S = init_likelihood(msa, single_cols, single_model)
	nnode, ncol, nbase = S.shape
	S = scipy.ascontiguousarray(S.reshape(S.size))
	P = init_likelihood_paired(msa, paired_cols, paired_model, nnode)
	nnode_p, ncol_p, nbase_p = P.shape
	P = scipy.ascontiguousarray(P.reshape(P.size))
	L_single_conserved = g(single_model.gtr.R, S, log(single_model.Frequency), order, range(ncol), nnode, ncol, nbase)
	L_paired = g(paired_model.gtr.R, P, log(paired_model.Frequency), order, range(ncol_p), nnode_p, ncol_p, nbase_p)
	like_s = L_single_conserved.sum()
	like_snp = like_s + L_paired.sum()
	return like_s, like_snp, S, P
Example #20
0
 def filter_calculation(cls, xi, ce, cs, *args, **kwargs):
     tf, nc = xi.shape
     ## don't do loading for now
     # params = {'tf':tf, 'chan_set':cs}
     # if ce.is_cond_ok(**params) is True:
     #     icmx = ce.get_icmx(**params)
     # else:
     #     icmx = ce.get_icmx_loaded(**params)
     ##
     icmx = ce.get_icmx(tf=tf, chan_set=cs)
     f = sp.dot(mcvec_to_conc(xi), icmx)
     return sp.ascontiguousarray(mcvec_from_conc(f, nc=nc), dtype=xi.dtype)
    def __init__(self, y, x, bias=-1):
        if (not isinstance(
                y,
            (list, tuple))) and (not (scipy and isinstance(y, scipy.ndarray))):
            raise TypeError("type of y: {0} is not supported!".format(type(y)))

        if isinstance(x, (list, tuple)):
            if len(y) != len(x):
                raise ValueError("len(y) != len(x)")
        elif scipy != None and isinstance(x, (scipy.ndarray, sparse.spmatrix)):
            if len(y) != x.shape[0]:
                raise ValueError("len(y) != len(x)")
            if isinstance(x, scipy.ndarray):
                x = scipy.ascontiguousarray(x)  # enforce row-major
            if isinstance(x, sparse.spmatrix):
                x = x.tocsr()
                pass
        else:
            raise TypeError("type of x: {0} is not supported!".format(type(x)))
        self.l = l = len(y)
        self.bias = -1

        max_idx = 0
        x_space = self.x_space = []
        if scipy != None and isinstance(x, sparse.csr_matrix):
            csr_to_problem(x, self)
            max_idx = x.shape[1]
        else:
            for i, xi in enumerate(x):
                tmp_xi, tmp_idx = gen_feature_nodearray(xi)
                x_space += [tmp_xi]
                max_idx = max(max_idx, tmp_idx)
        self.n = max_idx

        self.y = (c_double * l)()
        if scipy != None and isinstance(y, scipy.ndarray):
            scipy.ctypeslib.as_array(self.y, (self.l, ))[:] = y
        else:
            for i, yi in enumerate(y):
                self.y[i] = yi

        self.x = (POINTER(feature_node) * l)()
        if scipy != None and isinstance(x, sparse.csr_matrix):
            base = addressof(
                self.x_space.ctypes.data_as(POINTER(feature_node))[0])
            x_ptr = cast(self.x, POINTER(c_uint64))
            x_ptr = scipy.ctypeslib.as_array(x_ptr, (self.l, ))
            x_ptr[:] = self.rowptr[:-1] * sizeof(feature_node) + base
        else:
            for i, xi in enumerate(self.x_space):
                self.x[i] = xi

        self.set_bias(bias)
Example #22
0
 def filter_calculation(cls, xi, ce, cs, *args, **kwargs):
     tf, nc = xi.shape
     ## don't do loading for now
     # params = {'tf':tf, 'chan_set':cs}
     # if ce.is_cond_ok(**params) is True:
     #     icmx = ce.get_icmx(**params)
     # else:
     #     icmx = ce.get_icmx_loaded(**params)
     ##
     icmx = ce.get_icmx(tf=tf, chan_set=cs)
     f = sp.dot(mcvec_to_conc(xi), icmx)
     return sp.ascontiguousarray(mcvec_from_conc(f, nc=nc),
                                 dtype=xi.dtype)
Example #23
0
def simplex_array_boundary(s, parity):
    """
    Compute the boundary faces and boundary operator of an
    array of simplices with given simplex parities

    E.g.
    
      For a mesh with two triangles [0,1,2] and [1,3,2], the second
      triangle has opposite parity relative to sorted order.
      
      simplex_array_boundary(array([[0,1,2],[1,2,3]]),array([0,1]))
      
    """
    #TODO handle edge case as special case

    num_simplices = s.shape[0]
    faces_per_simplex = s.shape[1]
    num_faces = num_simplices * faces_per_simplex

    orientations = 1 - 2 * parity

    #faces[:,:-2] are the indices of the faces
    #faces[:,-2]  is the index of the simplex whose boundary produced the face
    #faces[:,-1]  is the orientation of the face in the boundary of the simplex
    faces = empty((num_faces, s.shape[1] + 1), dtype=s.dtype)
    for i in range(faces_per_simplex):
        rows = faces[num_simplices * i:num_simplices * (i + 1)]

        rows[:, :i] = s[:, :i]
        rows[:, i:-2] = s[:, i + 1:]
        rows[:, -2] = arange(num_simplices)
        rows[:, -1] = ((-1)**i) * orientations

    #sort rows
    faces = faces[lexsort(faces[:, :-2].T[::-1])]

    #find unique faces
    face_mask = ~hstack(
        (array([False]), alltrue(faces[1:, :-2] == faces[:-1, :-2], axis=1)))

    unique_faces = faces[face_mask, :-2]

    #compute CSR representation for boundary operator
    csr_indptr = hstack((arange(num_faces)[face_mask], array([num_faces])))
    csr_indices = ascontiguousarray(faces[:, -2])
    csr_data = faces[:, -1].astype('int8')

    shape = (len(unique_faces), num_simplices)
    boundary_operator = csr_matrix((csr_data, csr_indices, csr_indptr), shape)

    return unique_faces, boundary_operator
Example #24
0
	def __init__(self, y, x, bias = -1):
		if (not isinstance(y, (list, tuple))) and (not (scipy and isinstance(y, scipy.ndarray))):
			raise TypeError("type of y: {0} is not supported!".format(type(y)))

		if isinstance(x, (list, tuple)):
			if len(y) != len(x):
				raise ValueError("len(y) != len(x)")
		elif scipy != None and isinstance(x, (scipy.ndarray, sparse.spmatrix)):
			if len(y) != x.shape[0]:
				raise ValueError("len(y) != len(x)")
			if isinstance(x, scipy.ndarray):
				x = scipy.ascontiguousarray(x) # enforce row-major
			if isinstance(x, sparse.spmatrix):
				x = x.tocsr()
				pass
		else:
			raise TypeError("type of x: {0} is not supported!".format(type(x)))
		self.l = l = len(y)
		self.bias = -1

		max_idx = 0
		x_space = self.x_space = []
		if scipy != None and isinstance(x, sparse.csr_matrix):
			csr_to_problem(x, self)
			max_idx = x.shape[1]
		else:
			for i, xi in enumerate(x):
				tmp_xi, tmp_idx = gen_feature_nodearray(xi)
				x_space += [tmp_xi]
				max_idx = max(max_idx, tmp_idx)
		self.n = max_idx

		self.y = (c_double * l)()
		if scipy != None and isinstance(y, scipy.ndarray):
			scipy.ctypeslib.as_array(self.y, (self.l,))[:] = y
		else:
			for i, yi in enumerate(y):
				self.y[i] = yi

		self.x = (POINTER(feature_node) * l)()
		if scipy != None and isinstance(x, sparse.csr_matrix):
			base = addressof(self.x_space.ctypes.data_as(POINTER(feature_node))[0])
			x_ptr = cast(self.x, POINTER(c_uint64))
			x_ptr = scipy.ctypeslib.as_array(x_ptr,(self.l,))
			x_ptr[:] = self.rowptr[:-1] * sizeof(feature_node) + base
		else:
			for i, xi in enumerate(self.x_space):
				self.x[i] = xi

		self.set_bias(bias)
Example #25
0
def sortrows(data):
    """sort matrix by rows

    :type data: ndarray
    :param data: ndarray that should be sorted by its rows
    :returns: ndarray - data sorted by its rows.
    """

    ## FIX: this method assumes the data to be continuous! we now make sure of that explicitely
    data = sp.ascontiguousarray(data)
    ## XIF
    return sp.sort(
        data.view([('', data.dtype)] * data.shape[1]), axis=0
    ).view(data.dtype)
Example #26
0
def simplex_array_boundary(s,parity):
    """
    Compute the boundary faces and boundary operator of an
    array of simplices with given simplex parities

    E.g.
    
      For a mesh with two triangles [0,1,2] and [1,3,2], the second
      triangle has opposite parity relative to sorted order.
      
      simplex_array_boundary(array([[0,1,2],[1,2,3]]),array([0,1]))
      
    """
    #TODO handle edge case as special case
    
    num_simplices     = s.shape[0]
    faces_per_simplex = s.shape[1]
    num_faces         = num_simplices * faces_per_simplex

    orientations = 1 - 2*parity

    #faces[:,:-2] are the indices of the faces
    #faces[:,-2]  is the index of the simplex whose boundary produced the face
    #faces[:,-1]  is the orientation of the face in the boundary of the simplex
    faces = empty((num_faces,s.shape[1]+1),dtype=s.dtype)
    for i in range(faces_per_simplex):
        rows = faces[num_simplices*i:num_simplices*(i+1)]

        rows[:,  : i] = s[:,   :i]
        rows[:,i :-2] = s[:,i+1: ]
        rows[:, -2  ] = arange(num_simplices)
        rows[:, -1  ] = ((-1)**i)*orientations

    #sort rows
    faces = faces[lexsort( faces[:,:-2].T[::-1] )]

    #find unique faces
    face_mask    = -hstack((array([False]),alltrue(faces[1:,:-2] == faces[:-1,:-2],axis=1)))
    unique_faces = faces[face_mask,:-2]

    #compute CSR representation for boundary operator
    csr_indptr  = hstack((arange(num_faces)[face_mask],array([num_faces])))
    csr_indices = ascontiguousarray(faces[:,-2])
    csr_data    = faces[:,-1].astype('int8')
  
    shape = (len(unique_faces),num_simplices)   
    boundary_operator = csr_matrix((csr_data,csr_indices,csr_indptr), shape)

    return unique_faces,boundary_operator
def test(debug=True):
    from spikeplot import plt, mcdata

    # setup
    TF = 47
    signal, noise, ce, temps = load_input_data(TF)
    FB = BOTMNode(
        templates=temps,
        ce=ce,
        adapt_templates=10,
        learn_noise=False,
        verbose=VERBOSE(debug * 10),
        ovlp_taus=None,
        chunk_size=500)
    x = sp.ascontiguousarray(signal + noise, dtype=sp.float32)

    # sort
    FB.plot_xvft()
    FB(x)
    FB.plot_sorting()
    print FB.rval
    plt.show()
Example #28
0
def plink2HDF5(arguments):
    if not os.path.isfile(arguments.plink_data + ".map"):
        print "Argument --plink_data " + arguments.plink_data + ".map does not exist or is not a file\n"
        quit()
    if not os.path.isfile(arguments.plink_data + ".ped"):
        print "Argument --plink_data " + arguments.plink_data + ".ped does not exist or is not a file\n"
        quit()
    if arguments.maf>1 or arguments.maf<0:
        print "Argument --maf " + str(arguments.maf) + " must be between 0 and 1\n"
        quit()
    phenotype_list = []
    if arguments.plink_phenotype!=None:
        if os.path.isdir(arguments.plink_phenotype):
            for fn in os.listdir(arguments.plink_phenotype):
                filename = os.path.join(arguments.plink_phenotype,fn)
                if os.path.isfile(filename):
                    phenotype_list.append(filename)
                else:
                    print "Argument --plink_phenotype " + filename + " is not a file\n"
                    quit()

        else:
            if os.path.isfile(arguments.plink_phenotype):
                phenotype_list.append(arguments.plink_phenotype)
            else:
                print "Argument --plink_phenotype " + arguments.plink_phenotype + " does not exist or is not a file\n"
                quit()
    exclude_snps = []
    if not arguments.exclude_snps==None:
        if not os.path.isfile(arguments.exclude_snps):
            print "Argument --exclude_snps " + arguments.exclude_snps + " does not exist or is not a file\n"
            quit()
        f = open(arguments.exclude_snps,'r')
        for line in f:
            exclude_snps.append(line.strip())
        f.close()
    exclude_snps = sp.array(exclude_snps)

    f = open(arguments.plink_data + '.map','r')
    chromosomes = []
    positions = []
    identifiers = []
    ref_allele = []
    delimiter = "\t"
    for line in f:
        sv = line.strip().split(delimiter)
        if len(sv)==1:
            delimiter = " "
            sv = line.strip().split(delimiter)
            
        chromosomes.append(sv[0].strip())
        identifiers.append(sv[1].strip())
        positions.append(int(sv[3].strip()))
        if len(sv)==5:
            ref_allele.append(sv[4].strip())
    f.close()
    chromosomes = sp.array(chromosomes)
    positions = sp.array(positions)
    identifiers = sp.array(identifiers)
    ref_allele = sp.array(ref_allele)
    if ref_allele.shape[0]>0:
        if ref_allele.shape[0] != identifiers.shape[0]:
            print "[ERROR] in *.map file: Column 5 (Reference allele) does not exist for all identifiers\n"
            quit()

    f = open(arguments.plink_data + '.ped','r')
    sample_ids = []
    family_ids = []
    paternal_ids = []
    maternal_ids = []
    sex = []
    matrix = []
    delimiter = "\t"
    for line in f:
        sv = line.strip().split(delimiter)
        if len(sv)==1:
            delimiter = " "
            sv = line.strip().split(delimiter)

        family_ids.append(sv[0].strip())
        sample_ids.append(sv[1].strip())
        paternal_ids.append(sv[2].strip())
        maternal_ids.append(sv[3].strip())
        sex.append(sv[4].strip())
        snps = []
        #TODO ADD EXCEPTION IF NUMBER OF SNPs are wrong
        j = 6
        while j < len(sv)-1:
            snps.append(iupac_map[sv[j] + sv[j+1]])
            j = j+2
        matrix.append(snps)
    f.close()
    family_ids = sp.array(family_ids)
    sample_ids = sp.array(sample_ids)
    paternal_ids = sp.array(paternal_ids)
    maternal_ids = sp.array(maternal_ids)
    matrix = sp.array(matrix)

    #Exclude SNPs
    print "SNPs before excluding SNPs:\t\t\t" + str(matrix.shape[1])
    ex_indices = []
    for i,ident in enumerate(identifiers):
        if not ident in exclude_snps:
            ex_indices.append(i)
    ex_indices = sp.array(ex_indices)
    if ex_indices.shape[0]!=matrix.shape[1]:
        print "SNPs after excluding SNPs:\t\t\t" + str(ex_indices.shape[0])
    matrix = matrix[:,ex_indices]
    identifiers = identifiers[ex_indices]
    chromosomes = chromosomes[ex_indices]
    positions = positions[ex_indices]
    if ref_allele.shape[0]>0:
        ref_allele = ref_allele[ex_indices]

    #FILTER MAF
    encoded = sp.array([])
    if arguments.maf>0:
        [encoded,maf] = encodeHeterozygousData(matrix)
        ind = sp.where(maf>=arguments.maf)[0]
        print "SNPs before MAF filtering:\t\t\t" + str(encoded.shape[1])
        encoded = encoded[:,ind]
        matrix = matrix[:,ind]
        maf = maf[ind]
        identifiers = identifiers[ind]
        chromosomes = chromosomes[ind]
        positions = positions[ind]
        if ref_allele.shape[0]>0:
            ref_allele = ref_allele[ind]
        print "SNPs after MAF filtering:\t\t\t" + str(encoded.shape[1])

    #distinct filtering
    if arguments.distinct_filter>0:
        if encoded.shape != matrix.shape:
            [encoded,maf] = encodeHeterozygousData(matrix)
        rawT = encoded.T
        snp_strings = sp.ascontiguousarray(rawT).view(sp.dtype((sp.void,rawT.dtype.itemsize * rawT.shape[1])))
        [frequencies, inverse] = itemfreq(snp_strings)
        if arguments.distinct_filter==1:
            ind = sp.where(frequencies[:,1]==int(arguments.distinct_filter))[0]
            indices = sp.array(frequencies[ind,0],dtype="int")
        else:
            ind = sp.where(frequencies[:,1]==1)[0]
            indices = sp.array(frequencies[ind,0],dtype="int")
            for i in sp.arange(2,int(arguments.distinct_filter)+1):
                tmp_indices = sp.where(frequencies[:,1]==i)[0]
                for tmp in tmp_indices:
                    ind = sp.where(inverse==tmp)[0]
                    chrom = chromosomes[ind]
                    un = sp.unique(chrom)
                    if un.shape[0]==1:
                        indices = sp.concatenate([indices,ind])
        print "Number of SNPs before distinct filtering:\t", matrix.shape[1]
        print "Number of truly unique SNPs:\t\t\t", sp.where(frequencies[:,1]==1)[0].shape[0]
        matrix = matrix[:,indices]
        chromosomes = chromosomes[indices]
        positions = positions[indices]
        identifiers = identifiers[indices]
        if ref_allele.shape[0]>0:
            ref_allele = ref_allele[indices]
        print "Number of SNPs after distinct filtering:\t", indices.shape[0]
        
    #sort data
    ind = sp.argsort(chromosomes)
    matrix = matrix[:,ind]
    identifiers = identifiers[ind]
    chromosomes = chromosomes[ind]
    positions = positions[ind]
    if ref_allele.shape[0]>0:
        ref_allele = ref_allele[ind]
    
    chrom_list = sp.unique(chromosomes)
    for chrom in chrom_list:
        ind = sp.where(chromosomes==chrom)[0]
        pos_tmp = positions[ind]
        chrom_tmp = chromosomes[ind]
        matrix_tmp = matrix[:,ind]
        ident_tmp = identifiers[ind]
        if ref_allele.shape[0]>0:
            ref_tmp = ref_allele[ind]
        #sort by position
        ind2 = sp.argsort(pos_tmp)
        positions[ind] = pos_tmp[ind2]
        chromosomes[ind] = chrom_tmp[ind2]
        matrix[:,ind] = matrix_tmp[:,ind2]
        identifiers[ind] = ident_tmp[ind2]
        if ref_allele.shape[0]>0:
            ref_allele[ind] = ref_tmp[ind2]

    #STORE DATA
    hd5 = h5py.File(arguments.hout)

    #save genotype
    genotype = hd5.create_group("Genotype")
    genotype.create_dataset("raw",data=matrix,chunks=True,compression="gzip",compression_opts=9)
    genotype.create_dataset("chr_index",data=chromosomes,chunks=True,compression="gzip",compression_opts=9)
    genotype.create_dataset("position_index",data=positions,chunks=True,compression="gzip",compression_opts=9)
    genotype.create_dataset("identifiers",data=identifiers,chunks=True,compression="gzip",compression_opts=9)
    genotype.create_dataset("sample_ids",data=sample_ids,chunks=True,compression="gzip",compression_opts=9)
    if ref_allele.shape[0]>0:
        genotype.create_dataset("ref_allele",data=ref_allele,chunks=True,compression="gzip",compression_opts=9)

    #read phenotypes per file in folder and store data
    counter = 0
    phenotypes = hd5.create_group("Phenotypes")
    for filename in phenotype_list:
        f = open(filename,'r')
        sample_ids = []
        phenotype_names = []
        Y = []
        delimiter = "\t"
        for i,line in enumerate(f):
            sv = line.strip().split(delimiter)
            if len(sv)==1:
                delimiter = " "
                sv = line.strip().split(delimiter)

            if i==0:
                for j in xrange(2,len(sv)):
                    phenotype_names.append(sv[j].strip())
                continue
            sample_ids.append(sv[1].strip())
            yline = []
            for j in xrange(2,len(sv)):
                yline.append(float(sv[j].strip()))
            Y.append(yline)
        f.close()
        sample_ids = sp.array(sample_ids)
        Y = sp.array(Y)
        
        for i,phenotype in enumerate(phenotype_names):
            ph = phenotypes.create_group(str(counter))
            ph.create_dataset("name",data=phenotype)
            ph.create_dataset("sample_ids",data=sample_ids,chunks=True,compression="gzip",compression_opts=9)
            ph.create_dataset("y",data=Y[:,i],compression="gzip",compression_opts=9,chunks=True)
            counter += 1
    hd5.close()
Example #29
0
    def __init__(self, y, x, bias=0):

        if (not isinstance(
                y,
            (list, tuple))) and (not (scipy and isinstance(y, scipy.ndarray))):
            raise TypeError("type of y: {0} is not supported!".format(type(y)))

        if isinstance(x, (list, tuple)):
            if len(y) != len(x):
                raise ValueError("len(y) != len(x)")

        elif scipy != None and isinstance(x, (scipy.ndarray, sparse.spmatrix)):
            if len(y) != x.shape[0]:
                raise ValueError("len(y) != len(x)")
            if isinstance(x, scipy.ndarray):
                x = scipy.ascontiguousarray(x)
            if isinstance(x, sparse.spmatrix):
                x = x.tocsr()
                pass
        else:
            raise TypeError("type of x: {0} is not supported!".format(type(x)))

        self.m = m = len(y)  # instance number

        self.bias = bias

        max_idx = 0
        x_space = self.x_space = []

        if scipy != None and isinstance(x, sparse.csr_matrix):
            csr_to_problem(x, self)
            max_idx = x.shape[1]
        else:
            for i, xi in enumerate(x):
                tmp_xi, tmp_idx = gen_feature_nodearray(xi)
                x_space += [tmp_xi]
                max_idx = max(max_idx, tmp_idx)

        self.d = max_idx  # dimension

        self.y = (c_double * m)()
        if scipy != None and isinstance(y, scipy.ndarray):  # ndarray (1-D)
            scipy.ctypeslib.as_array(self.y, (self.m, ))[:] = y
        else:
            for i, yi in enumerate(y):  # list / tuple
                self.y[i] = yi

        self.x = (POINTER(feature_node) * m)()
        if scipy != None and isinstance(x, sparse.csr_matrix):
            base = addressof(
                self.x_space.ctypes.data_as(POINTER(feature_node))[0])
            x_ptr = cast(self.x, POINTER(c_uint64))
            x_ptr = scipy.ctypeslib.as_array(x_ptr, (self.m, ))
            x_ptr[:] = self.rowptr[:-1] * sizeof(feature_node) + base
        else:
            for i, xi in enumerate(self.x_space):
                self.x[i] = xi

        if self.bias == 1:
            self.d += 1
            node = feature_node(self.d, 1)
            if isinstance(self.x_space, list):
                for xi in self.x_space:
                    xi[-2] = node
            else:
                self.x_space["index"][self.rowptr[1:] - 2] = node.index
                self.x_space["value"][self.rowptr[1:] - 2] = node.value
Example #30
0
	single_cols = xrange(msa.aln_len)
	paired_cols = msa.BP.items()
	paired_cols.sort()
	n = msa.nseq

	S = init_likelihood(msa, single_cols, single_model)

	g = MyMat.calc_likelihood
	# NOTE: NO LONGER logs the single model Frequency!
	# first calculate the null model (joint indep prob at each position)
	# TODO: this is not the fastest code ever....but will do for now
	L_null = [sum(sum(exp(S[:msa.nseq, col, :4]) * log(single_model.Frequency))) for col in single_cols]

	# convert S into 1d
	nnode, ncol, nbase = S.shape
	S = scipy.ascontiguousarray(S.reshape(S.size))

	P = init_likelihood_paired(msa, paired_cols, paired_model, nnode)
	nnode_p, ncol_p, nbase_p = P.shape
	P = scipy.ascontiguousarray(P.reshape(P.size))

	like_s, like_p, S, P = calc_likelihood(msa, order, single_model, paired_model) # need to use this to set up S, P for rearr

	import cello, Subtree, Tree
	def rearr(t, log_l, rL, rU):
		for target in t.postorder_node_iter():
			if target.parent_node is None:
				break
			# first make a deep copy of the tree
			t_prime = dendropy.Tree(t)
			print "finding subtree rearrangement for {0}".format(target.label)
Example #31
0
    xres=(upper_right[0]-lower_left[0])*1.0/(1.0*(nx-1))
    desiredX=scipy.linspace(lower_left[0], upper_right[0],nx )
    ny=round((upper_right[1]-lower_left[1])*1.0/(1.0*CellSize)) + 1
    yres=(upper_right[1]-lower_left[1])*1.0/(1.0*(ny-1))
    desiredY=scipy.linspace(lower_left[1], upper_right[1], ny)

    gridX, gridY=scipy.meshgrid(desiredX,desiredY)

    if(verbose):
        print 'Making interpolation functions...'
    swwXY=scipy.array([swwX[:],swwY[:]]).transpose()

    # Get function to interpolate quantity onto gridXY_array
    gridXY_array=scipy.array([scipy.concatenate(gridX),
                              scipy.concatenate(gridY)]).transpose()
    gridXY_array=scipy.ascontiguousarray(gridXY_array)

    # Create Interpolation function
    #basic_nearest_neighbour=False
    if(k_nearest_neighbours==1):
        index_qFun = scipy.interpolate.NearestNDInterpolator(
            swwXY,
            scipy.arange(len(swwX),dtype='int64').transpose())
        gridqInd = index_qFun(gridXY_array)
        # Function to do the interpolation
        def myInterpFun(quantity):
            return quantity[gridqInd]
    else:
        # Combined nearest neighbours and inverse-distance interpolation
        index_qFun = scipy.spatial.cKDTree(swwXY)
        NNInfo = index_qFun.query(gridXY_array, k=k_nearest_neighbours)
Example #32
0
def optimize_branch_func(t_a, parent, child, msa, order, single_model, paired_model, S=None, P=None):
	"""
	<t_a> is the new branch length between <parent> --- <child>
	modify it in <order> and return the log likelihood as
	sum of (likelihood of single cols) + (likelihood of paired cols)

	single_model.Frequency AND paired_model.Frequency should both NOT be in log scale!!

	TODO: speed up the likelihood calculation since we're only changing one branch
	      at a time!!

	currently wrapped up by ::optimize_branch::
	"""
	assert len(t_a) == 1
	g = MyMat.calc_likelihood
	paired_cols = msa.BP.items()
	paired_cols.sort()
	single_cols = filter(lambda i: i not in msa.BP and i not in msa.BP.itervalues(), xrange(msa.aln_len))
	can_cheat = True

	nnode = 2*msa.nseq + 1
	ncol = len(single_cols)
	nbase = 5
	nnode_p = nnode
	ncol_p = len(paired_cols)
	nbase_p = 25

	if S is None:
		S = init_likelihood(msa, single_cols, single_model)
		S = scipy.ascontiguousarray(S.reshape(S.size))
		can_cheat = False
	
	if P is None:
		P = init_likelihood_paired(msa, paired_cols, paired_model, nnode)
		P = scipy.ascontiguousarray(P.reshape(P.size))
		can_cheat = False

	if can_cheat:
		cheat_order = [] # this is the cheating list of nodes that cannot be reused
		                 # and must be calculated
		start_cheat = False
		left_subtree_ind = order[-1][1][0][0]
		for x, (k, bunch) in enumerate(order):
			if k == parent:
				for i, (a, old_t_a) in enumerate(bunch):
					if a == child:
						bunch[i] = (a, t_a[0])
						#print >> sys.stderr, "changed branch length between \
						#		{0}, {1} to {2}".format(parent,child, t_a)
						start_cheat = True
						break
			if not can_cheat or start_cheat:
				cheat_order.append(order[x])
			if can_cheat and k == left_subtree_ind and start_cheat:
				cheat_order.append(order[-1]) # put in the root calc
				break
	else:
		cheat_order = order

	L_single_conserved = g(single_model.gtr.R, S, log(single_model.Frequency), cheat_order, range(ncol), nnode, ncol, nbase)
	L_paired = g(paired_model.gtr.R, P, log(paired_model.Frequency), cheat_order, range(ncol_p), nnode_p, ncol_p, nbase_p)
	return -(L_single_conserved.sum() + L_paired.sum())
Example #33
0
 def write_i8x2(self, x):
     y = sp.ascontiguousarray(x)
     libfwrite.fwrite_i8x1(self._unit, y.ctypes.data_as(ctypes.POINTER(ctypes.c_int8)), y.size)
Example #34
0
    xres = (upper_right[0]-lower_left[0])*1.0/(1.0*(nx-1))
    desiredX = scipy.linspace(lower_left[0], upper_right[0],nx )
    ny = int(round((upper_right[1]-lower_left[1])*1.0/(1.0*CellSize)) + 1)
    yres = (upper_right[1]-lower_left[1])*1.0/(1.0*(ny-1))
    desiredY = scipy.linspace(lower_left[1], upper_right[1], ny)

    gridX, gridY = scipy.meshgrid(desiredX, desiredY)

    if(verbose):
        print 'Making interpolation functions...'
    swwXY = scipy.array([swwX[:],swwY[:]]).transpose()

    # Get function to interpolate quantity onto gridXY_array
    gridXY_array = scipy.array([scipy.concatenate(gridX),
        scipy.concatenate(gridY)]).transpose()
    gridXY_array = scipy.ascontiguousarray(gridXY_array)

    # Create Interpolation function
    #basic_nearest_neighbour=False
    if(k_nearest_neighbours == 1):
        index_qFun = scipy.interpolate.NearestNDInterpolator(
            swwXY,
            scipy.arange(len(swwX),dtype='int64').transpose())
        gridqInd = index_qFun(gridXY_array)
        # Function to do the interpolation
        def myInterpFun(quantity):
            return quantity[gridqInd]
    else:
        # Combined nearest neighbours and inverse-distance interpolation
        index_qFun = scipy.spatial.cKDTree(swwXY)
        NNInfo = index_qFun.query(gridXY_array, k=k_nearest_neighbours)
Example #35
0
 def write_i4x1(self, x):
     y = sp.ascontiguousarray(x)
     y = y.astype(sp.int32, casting="same_kind", copy=False)
     libfwrite.fwrite_i4x1(self._unit, y.ctypes.data_as(ctypes.POINTER(ctypes.c_int)), y.size)
Example #36
0
 def write_r8x1(self, x):
     y = sp.ascontiguousarray(x)
     y = y.astype(sp.float64, casting="same_kind", copy=False)
     libfwrite.fwrite_r8x1(self._unit, y.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), y.size)
def generate_node_connectivity_array(index_map, data_array):
    r"""
    Generates a node connectivity array based on faces, edges and corner
    adjacency
    """
    #
    logger.info('generating network connections...')
    #
    # setting up some constants
    x_dim, y_dim, z_dim = data_array.shape
    conn_map = list(product([0, -1, 1], [0, -1, 1], [0, -1, 1]))
    conn_map = sp.array(conn_map, dtype=int)
    conn_map = conn_map[1:]
    #
    # creating slice list to process data chunks
    slice_list = [slice(0, 10000)]
    for i in range(slice_list[0].stop, index_map.shape[0], slice_list[0].stop):
        slice_list.append(slice(i, i+slice_list[0].stop))
    slice_list[-1] = slice(slice_list[-1].start, index_map.shape[0])
    #
    conns = sp.ones((0, 2), dtype=sp.uint32)
    logger.debug('    number of slices to process: {}'.format(len(slice_list)))
    for sect in slice_list:
        # getting coordinates of nodes and their neighbors
        nodes = index_map[sect]
        inds = sp.repeat(nodes, conn_map.shape[0], axis=0)
        inds += sp.tile(conn_map, (nodes.shape[0], 1))
        #
        # calculating the flattened index of the central nodes and storing
        nodes = sp.ravel_multi_index(sp.hsplit(nodes, 3), data_array.shape)
        inds = sp.hstack([inds, sp.repeat(nodes, conn_map.shape[0], axis=0)])
        #
        # removing neigbors with negative indicies
        mask = ~inds[:, 0:3] < 0
        inds = inds[sp.sum(mask, axis=1) == 3]
        # removing neighbors with indicies outside of bounds
        mask = (inds[:, 0] < x_dim, inds[:, 1] < y_dim, inds[:, 2] < z_dim)
        mask = sp.stack(mask, axis=1)
        inds = inds[sp.sum(mask, axis=1) == 3]
        # removing indices with zero-weight connection
        mask = data_array[inds[:, 0], inds[:, 1], inds[:, 2]]
        inds = inds[mask]
        if inds.size:
            # calculating flattened index of remaining nieghbor nodes
            nodes = sp.ravel_multi_index(sp.hsplit(inds[:, 0:3], 3),
                                         data_array.shape)
            inds = sp.hstack([sp.reshape(inds[:, -1], (-1, 1)), nodes])
            # ensuring conns[0] is always < conns[1] for duplicate removal
            mask = inds[:, 0] > inds[:, 1]
            inds[mask] = inds[mask][:, ::-1]
            # appending section connectivity data to conns array
            conns = sp.append(conns, inds.astype(sp.uint32), axis=0)
    #
    # using scipy magic from stackoverflow to remove dupilcate connections
    logger.info('removing duplicate connections...')
    dim0 = conns.shape[0]
    conns = sp.ascontiguousarray(conns)
    dtype = sp.dtype((sp.void, conns.dtype.itemsize*conns.shape[1]))
    dim1 = conns.shape[1]
    conns = sp.unique(conns.view(dtype)).view(conns.dtype).reshape(-1, dim1)
    logger.debug('    removed {} duplicates'.format(dim0 - conns.shape[0]))
    #
    return conns
Example #38
0
def predict(y, x, m, options=""):
    """
	predict(y, x, m [, options]) -> (p_labels, p_acc, p_vals)

	y: a list/tuple/ndarray of l true labels (type must be int/double).
	   It is used for calculating the accuracy. Use [] if true labels are
	   unavailable.

	x: 1. a list/tuple of l training instances. Feature vector of
	      each training instance is a list/tuple or dictionary.

	   2. an l * n numpy ndarray or scipy spmatrix (n: number of features).

	Predict data (y, x) with the SVM model m.
	options:
	    -b probability_estimates: whether to output probability estimates, 0 or 1 (default 0); currently for logistic regression only
	    -q quiet mode (no outputs)

	The return tuple contains
	p_labels: a list of predicted labels
	p_acc: a tuple including  accuracy (for classification), mean-squared
	       error, and squared correlation coefficient (for regression).
	p_vals: a list of decision values or probability estimates (if '-b 1'
	        is specified). If k is the number of classes, for decision values,
	        each element includes results of predicting k binary-class
	        SVMs. if k = 2 and solver is not MCSVM_CS, only one decision value
	        is returned. For probabilities, each element contains k values
	        indicating the probability that the testing instance is in each class.
	        Note that the order of classes here is the same as 'model.label'
	        field in the model structure.
	"""
    def info(s):
        print(s)

    if scipy and isinstance(x, scipy.ndarray):
        x = scipy.ascontiguousarray(x)  # enforce row-major
    elif sparse and isinstance(x, sparse.spmatrix):
        x = x.tocsr()
    elif not isinstance(x, (list, tuple)):
        raise TypeError("type of x: {0} is not supported!".format(type(x)))

    if (not isinstance(
            y,
        (list, tuple))) and (not (scipy and isinstance(y, scipy.ndarray))):
        raise TypeError("type of y: {0} is not supported!".format(type(y)))

    predict_probability = 0
    argv = options.split()
    i = 0
    while i < len(argv):
        if argv[i] == '-b':
            i += 1
            predict_probability = int(argv[i])
        elif argv[i] == '-q':
            info = print_null
        else:
            raise ValueError("Wrong options")
        i += 1

    solver_type = m.param.solver_type
    nr_class = m.get_nr_class()
    nr_feature = m.get_nr_feature()
    is_prob_model = m.is_probability_model()
    bias = m.bias
    if bias >= 0:
        biasterm = feature_node(nr_feature + 1, bias)
    else:
        biasterm = feature_node(-1, bias)
    pred_labels = []
    pred_values = []

    if scipy and isinstance(x, sparse.spmatrix):
        nr_instance = x.shape[0]
    else:
        nr_instance = len(x)

    if predict_probability:
        if not is_prob_model:
            raise TypeError(
                'probability output is only supported for logistic regression')
        prob_estimates = (c_double * nr_class)()
        for i in range(nr_instance):
            if scipy and isinstance(x, sparse.spmatrix):
                indslice = slice(x.indptr[i], x.indptr[i + 1])
                xi, idx = gen_feature_nodearray(
                    (x.indices[indslice], x.data[indslice]),
                    feature_max=nr_feature)
            else:
                xi, idx = gen_feature_nodearray(x[i], feature_max=nr_feature)
            xi[-2] = biasterm
            label = liblinear.predict_probability(m, xi, prob_estimates)
            values = prob_estimates[:nr_class]
            pred_labels += [label]
            pred_values += [values]
    else:
        if nr_class <= 2:
            nr_classifier = 1
        else:
            nr_classifier = nr_class
        dec_values = (c_double * nr_classifier)()
        for i in range(nr_instance):
            if scipy and isinstance(x, sparse.spmatrix):
                indslice = slice(x.indptr[i], x.indptr[i + 1])
                xi, idx = gen_feature_nodearray(
                    (x.indices[indslice], x.data[indslice]),
                    feature_max=nr_feature)
            else:
                xi, idx = gen_feature_nodearray(x[i], feature_max=nr_feature)
            xi[-2] = biasterm
            label = liblinear.predict_values(m, xi, dec_values)
            values = dec_values[:nr_classifier]
            pred_labels += [label]
            pred_values += [values]

    if len(y) == 0:
        y = [0] * nr_instance
    ACC, MSE, SCC = evaluations(y, pred_labels)

    if m.is_regression_model():
        info("Mean squared error = %g (regression)" % MSE)
        info("Squared correlation coefficient = %g (regression)" % SCC)
    else:
        info("Accuracy = %g%% (%d/%d) (classification)" %
             (ACC, int(round(nr_instance * ACC / 100)), nr_instance))

    return pred_labels, (ACC, MSE, SCC), pred_values
Example #39
0
ind = (route['sind'] != route['dind'])
Nt = route['dind'][ind].size
vlist = [('', 'i4'), ('Nt', 'i4'), ('', 'i4'), ('', 'i4'), ('sind', 'i4', Nt),
         ('', 'i4'), ('', 'i4'), ('dind', 'i4', Nt), ('', 'i4'), ('', 'i4'),
         ('weight', 'f4', Nt), ('', 'i4')]

route_new = sp.array(sp.zeros(1, ), dtype=vlist)
route_new['f0'] = 4
route_new['Nt'] = Nt
route_new['f2'] = 4
route_new['f3'] = Nt * 4
route_new['sind'] = route['sind'][ind]
route_new['f5'] = Nt * 4
route_new['f6'] = Nt * 4
route_new['dind'], route_new['weight'] = reroute(
    sp.ascontiguousarray(route['dind'][ind]),
    sp.ascontiguousarray(route['weight'][ind]), sp.ascontiguousarray(slmask),
    sp.ascontiguousarray(tdata['ii']), sp.ascontiguousarray(tdata['jj']),
    sp.ascontiguousarray(tdata['lon']), sp.ascontiguousarray(tdata['lat']),
    sp.ascontiguousarray(tdata['area']), sp.ascontiguousarray(tdata['type']),
    sp.ascontiguousarray(otdata['tnum']))
route_new['f8'] = Nt * 4
route_new['f9'] = Nt * 4
route_new['f11'] = Nt * 4

route_new.tofile(dir + '/runoff_new.bin')
print('...done')

dstind_old = route['dind'][ind]
gind = sp.zeros((Nt, 4), dtype='i4')
gind[:, 0] = tdata[dstind_old - 1]['ii'] - 1
Example #40
0
    sys.exit(0)

move0_index = move0_index - move0_index[0]
move1_index = move1_index - move1_index[0]
move2_index = move2_index - move2_index[0]
check_flag0 = check_move(move0_index)
check_flag1 = check_move(move1_index)
check_flag2 = check_move(move2_index)

print 'done'
print 'check flag0:', check_flag0, 'check_flag1:', check_flag1, 'check_flag2:', check_flag2
if (not check_flag0) or (not check_flag1) or (not check_flag2):
    raise ValueError

# Pack into 2d array - kludge
N = move0.shape[0]
move_index = scipy.zeros((N, 3), scipy.integer)
move_index[:, 0] = move0_index
move_index[:, 1] = move1_index
move_index[:, 2] = move2_index
move_index = scipy.ascontiguousarray(move_index)

# Load data into buffer
print 'loading data into buffer ...',
sys.stdout.flush()
cmd2motors('unlock-buffer')
time.sleep(0.1)  # Note a good way to do tings (really want a try loop)
flag = load_os_buffer(move_index)
cmd2motors('lock-buffer')
print 'done'
Example #41
0
    def loadData(self, settings=None):
        '''
        Load Phenotype Data
        '''
        try:
            if settings.phenotype_file == None:
                self.__y = self.__dbfile['Phenotypes/' +
                                         str(settings.phenotype_id) + '/y'][:]
                self.__sample_ids = self.__dbfile['Phenotypes/' +
                                                  str(settings.phenotype_id) +
                                                  '/sample_ids'][:]
                self.__phenotype_name = self.__dbfile[
                    'Phenotypes/' + str(settings.phenotype_id) + "/name"].value
            else:
                f = h5py.File(settings.phenotype_file, 'r')
                self.__y = f['Phenotypes/' + str(settings.phenotype_id) +
                             '/y'][:]
                self.__sample_ids = f['Phenotypes/' +
                                      str(settings.phenotype_id) +
                                      '/sample_ids'][:]
                self.__phenotype_name = f['Phenotypes/' +
                                          str(settings.phenotype_id) +
                                          "/name"].value
                f.close()
        except:
            print "[ERROR] Loading Phenotype went wrong"
            quit()
        #remove missing values
        ind = sp.where(~sp.isnan(self.__y))[0]
        self.__y = self.__y[ind]
        self.__sample_ids = self.__sample_ids[ind]
        #transform phenotypes
        self.__y = self.transformData(self.__y,
                                      settings.phenotype_transformation)
        '''
        Load Covariate Data and restrict samples
        '''
        self.__cov = None
        covariates = settings.covariates
        for covariate in covariates:
            try:
                cov = self.__dbfile['Covariates/' + str(covariate) + '/y'][:]
                sample_ids = self.__dbfile['Covariates/' + str(covariate) +
                                           '/sample_ids'][:]
            except:
                print "[ERROR] Loading Covariate went wrong"
                quit()
            #transform covariates
            cov = self.transformData(cov, settings.covariate_transformation)
            #match samples
            sample_indices = (sp.reshape(
                sample_ids,
                (sample_ids.shape[0], 1)) == self.__sample_ids).nonzero()
            sample_ids = sample_ids[sample_indices[0]]
            if self.__cov == None:
                self.__cov = cov[sample_indices[0]]
            else:
                self.__cov = sp.column_stack([self.__cov, cov])
            self.__y = self.__y[sample_indices[1]]
            self.__sample_ids = self.__sample_ids[sample_indices[1]]
        '''
        Load Genotype Data and restrict samples
        '''
        sample_ids_file = self.__dbfile['Genotype/sample_ids'][:]
        raw_data = self.__dbfile['Genotype/raw'][:]
        self.__chr_index = self.__dbfile['Genotype/chr_index'][:]
        self.__pos_index = self.__dbfile['Genotype/position_index'][:]
        sample_indices = (sp.reshape(
            self.__sample_ids,
            (self.__sample_ids.shape[0], 1)) == sample_ids_file).nonzero()
        self.__sample_ids = self.__sample_ids[sample_indices[0]]
        self.__y = self.__y[sample_indices[0]]
        if not self.__cov is None:
            self.__cov = self.__cov[sample_indices[0]]
        raw_data = raw_data[sample_indices[1], :]
        self.__raw = raw_data
        if settings.homozygous == True:
            [self.__x, self.__maf_data] = self.encodeHomozygousData(raw_data)
        else:
            [self.__x, self.__maf_data
             ] = self.encodeHeterozygousData(raw_data, settings.snp_encoding)
            #if settings.snp_encoding!="additive":
            #[self.__x_additive, self.__maf_data] = self.encodeHeterozygousData(raw_data)
            #This was experimental to use an additve Kinship in case a other encoding was selected, now we only use the MAF filtering based
            #on an additve model! Comment the next line out if you wish to use the additve kinship matrix again
            #    self.__x_additive = None
        if settings.maf > 0.0:
            self.filter_mAF(settings.maf)
        self.filterNonInformativeSNPs()

        if settings.principle_components > 0:
            if not self.__x_additive is None:
                cov = sp.real(
                    self.computePCA(X=self.__x_additive,
                                    number_pcs=settings.principle_components))
            else:
                cov = sp.real(
                    self.computePCA(X=self.__x,
                                    number_pcs=settings.principle_components))
            if self.__cov == None:
                self.__cov = cov
            else:
                self.__cov = sp.column_stack([self.__cov, cov])

        if not self.__x_additive is None:
            tmpX = self.__x_additive.T
        else:
            tmpX = self.__x.T
        usnps, self.__snp_hash = sp.unique(sp.ascontiguousarray(tmpX).view(
            sp.dtype((sp.void, tmpX.dtype.itemsize * tmpX.shape[1]))),
                                           return_inverse=True)

        #compute kinship kernel if necessary
        if self.__algorithm == "FaSTLMM" or self.__algorithm == "EMMAX" or self.__algorithm == "EMMAXperm":
            if settings.unique_snps_only:
                tmp, uindex = sp.unique(self.__snp_hash, return_index=True)
                if not self.__x_additive is None:
                    self.__ass.setK(
                        gwas_core.CKernels.realizedRelationshipKernel(
                            self.__x_additive[:, uindex]))
                    #self.__ass.setK(self.computeRealizedRelationshipKernel(genotype=self.__x_additive[:,uindex]))
                else:
                    self.__ass.setK(
                        gwas_core.CKernels.realizedRelationshipKernel(
                            self.__x[:, uindex]))
                    #self.__ass.setK(self.computeRealizedRelationshipKernel(genotype=self.__x[:,uindex]))
            else:
                if not self.__x_additive is None:
                    self.__ass.setK(
                        gwas_core.CKernels.realizedRelationshipKernel(
                            self.__x_additive))
                    #self.__ass.setK(self.computeRealizedRelationshipKernel(genotype=self.__x_additive))
                else:
                    self.__ass.setK(
                        gwas_core.CKernels.realizedRelationshipKernel(
                            self.__x))
Example #42
0
    def loadData(self,settings=None):
        '''
        Load Phenotype Data
        '''
        try:
            if settings.phenotype_file==None:
                self.__y = self.__dbfile['Phenotypes/' + str(settings.phenotype_id) + '/y'][:]
                self.__sample_ids = self.__dbfile['Phenotypes/' + str(settings.phenotype_id) + '/sample_ids'][:]
                self.__phenotype_name = self.__dbfile['Phenotypes/' + str(settings.phenotype_id) + "/name"].value 
            else:
                f = h5py.File(settings.phenotype_file,'r')
                self.__y = f['Phenotypes/' + str(settings.phenotype_id) + '/y'][:]
                self.__sample_ids = f['Phenotypes/' + str(settings.phenotype_id) + '/sample_ids'][:]
                self.__phenotype_name = f['Phenotypes/' + str(settings.phenotype_id) + "/name"].value 
                f.close()
        except:
            print "[ERROR] Loading Phenotype went wrong"
            quit()
        #remove missing values
        ind = sp.where(~sp.isnan(self.__y))[0]
        self.__y = self.__y[ind]
        self.__sample_ids = self.__sample_ids[ind]
        #transform phenotypes
        self.__y = self.transformData(self.__y,settings.phenotype_transformation)
        
        '''
        Load Covariate Data and restrict samples
        '''
        self.__cov = None
        covariates = settings.covariates
        for covariate in covariates:
            try:
                cov = self.__dbfile['Covariates/' + str(covariate) + '/y'][:]
                sample_ids = self.__dbfile['Covariates/' + str(covariate) + '/sample_ids'][:]
            except:
                print "[ERROR] Loading Covariate went wrong"
                quit()
            #transform covariates
            cov = self.transformData(cov,settings.covariate_transformation)
            #match samples
            sample_indices = (sp.reshape(sample_ids,(sample_ids.shape[0],1))==self.__sample_ids).nonzero()
            sample_ids = sample_ids[sample_indices[0]]
            if self.__cov==None:
                self.__cov = cov[sample_indices[0]]
            else:
                self.__cov = sp.column_stack([self.__cov,cov])
            self.__y = self.__y[sample_indices[1]]
            self.__sample_ids = self.__sample_ids[sample_indices[1]]
        
        '''
        Load Genotype Data and restrict samples
        '''
        sample_ids_file = self.__dbfile['Genotype/sample_ids'][:]
        raw_data = self.__dbfile['Genotype/raw'][:]
        self.__chr_index = self.__dbfile['Genotype/chr_index'][:]
        self.__pos_index = self.__dbfile['Genotype/position_index'][:]
        sample_indices = (sp.reshape(self.__sample_ids,(self.__sample_ids.shape[0],1))==sample_ids_file).nonzero()
        self.__sample_ids = self.__sample_ids[sample_indices[0]]
        self.__y = self.__y[sample_indices[0]]
        if not self.__cov is None:
            self.__cov = self.__cov[sample_indices[0]]
        raw_data = raw_data[sample_indices[1],:]
        self.__raw = raw_data
        if settings.homozygous==True:
            [self.__x, self.__maf_data] = self.encodeHomozygousData(raw_data)
        else:
            [self.__x, self.__maf_data] = self.encodeHeterozygousData(raw_data,settings.snp_encoding)
            #if settings.snp_encoding!="additive":
                #[self.__x_additive, self.__maf_data] = self.encodeHeterozygousData(raw_data)
                #This was experimental to use an additve Kinship in case a other encoding was selected, now we only use the MAF filtering based
                #on an additve model! Comment the next line out if you wish to use the additve kinship matrix again
            #    self.__x_additive = None
        if settings.maf > 0.0:
            self.filter_mAF(settings.maf)
        self.filterNonInformativeSNPs()
        
        if settings.principle_components > 0:
            if not self.__x_additive is None:
                cov = sp.real(self.computePCA(X=self.__x_additive,number_pcs=settings.principle_components))
            else:
                cov = sp.real(self.computePCA(X=self.__x,number_pcs=settings.principle_components))
            if self.__cov == None:
                self.__cov = cov
            else:
                self.__cov = sp.column_stack([self.__cov,cov])

        if not self.__x_additive is None:
            tmpX = self.__x_additive.T
        else:            
            tmpX = self.__x.T
        usnps,self.__snp_hash=sp.unique(sp.ascontiguousarray(tmpX).view(sp.dtype((sp.void,tmpX.dtype.itemsize*tmpX.shape[1]))),return_inverse=True)
        
        #compute kinship kernel if necessary
        if self.__algorithm == "FaSTLMM" or self.__algorithm=="EMMAX" or self.__algorithm=="EMMAXperm":
            if settings.unique_snps_only:
                tmp,uindex = sp.unique(self.__snp_hash,return_index=True)
                if not self.__x_additive is None:
                    self.__ass.setK(gwas_core.CKernels.realizedRelationshipKernel(self.__x_additive[:,uindex]))
                    #self.__ass.setK(self.computeRealizedRelationshipKernel(genotype=self.__x_additive[:,uindex]))
                else:
                    self.__ass.setK(gwas_core.CKernels.realizedRelationshipKernel(self.__x[:,uindex]))
                    #self.__ass.setK(self.computeRealizedRelationshipKernel(genotype=self.__x[:,uindex]))
            else:
                if not self.__x_additive is None:
                    self.__ass.setK(gwas_core.CKernels.realizedRelationshipKernel(self.__x_additive))
                    #self.__ass.setK(self.computeRealizedRelationshipKernel(genotype=self.__x_additive))
                else:
                    self.__ass.setK(gwas_core.CKernels.realizedRelationshipKernel(self.__x))
def generate_node_connectivity_array(index_map, data_array):
    r"""
    Generates a node connectivity array based on faces, edges and corner
    adjacency
    """
    #
    logger.info('generating network connections...')
    #
    # setting up some constants
    x_dim, y_dim, z_dim = data_array.shape
    conn_map = list(product([0, -1, 1], [0, -1, 1], [0, -1, 1]))
    #
    conn_map = sp.array(conn_map, dtype=int)
    conn_map = conn_map[1:]
    #
    # creating slice list to process data chunks
    slice_list = [slice(0, 10000)]
    for i in range(slice_list[0].stop, index_map.shape[0], slice_list[0].stop):
        slice_list.append(slice(i, i + slice_list[0].stop))
    slice_list[-1] = slice(slice_list[-1].start, index_map.shape[0])
    #
    conns = sp.ones((0, 2), dtype=data_array.index_int_type)
    logger.debug('\tnumber of slices to process: {}'.format(len(slice_list)))
    percent = 10
    for n, sect in enumerate(slice_list):
        # getting coordinates of nodes and their neighbors
        nodes = index_map[sect]
        inds = sp.repeat(nodes, conn_map.shape[0], axis=0)
        inds += sp.tile(conn_map, (nodes.shape[0], 1))
        #
        # calculating the flattened index of the central nodes and storing
        nodes = sp.ravel_multi_index(sp.hsplit(nodes, 3), data_array.shape)
        inds = sp.hstack([inds, sp.repeat(nodes, conn_map.shape[0], axis=0)])
        #
        # removing neigbors with negative indicies
        mask = ~inds[:, 0:3] < 0
        inds = inds[sp.sum(mask, axis=1) == 3]
        # removing neighbors with indicies outside of bounds
        mask = (inds[:, 0] < x_dim, inds[:, 1] < y_dim, inds[:, 2] < z_dim)
        mask = sp.stack(mask, axis=1)
        inds = inds[sp.sum(mask, axis=1) == 3]
        # removing indices with zero-weight connection
        mask = data_array[inds[:, 0], inds[:, 1], inds[:, 2]]
        inds = inds[mask]
        if inds.size:
            # calculating flattened index of remaining nieghbor nodes
            nodes = sp.ravel_multi_index(sp.hsplit(inds[:, 0:3], 3),
                                         data_array.shape)
            inds = sp.hstack([sp.reshape(inds[:, -1], (-1, 1)), nodes])
            # ensuring conns[0] is always < conns[1] for duplicate removal
            mask = inds[:, 0] > inds[:, 1]
            inds[mask] = inds[mask][:, ::-1]
            # appending section connectivity data to conns array
            conns = sp.append(conns, inds.astype(sp.uint32), axis=0)
        if int(n / len(slice_list) * 100) == percent:
            logger.debug('\tprocessed slice {:5d}, {}% complete'.format(
                n, percent))
            percent += 10
    #
    # using scipy magic from stackoverflow to remove dupilcate connections
    logger.info('removing duplicate connections...')
    dim0 = conns.shape[0]
    conns = sp.ascontiguousarray(conns)
    dtype = sp.dtype((sp.void, conns.dtype.itemsize * conns.shape[1]))
    dim1 = conns.shape[1]
    conns = sp.unique(conns.view(dtype)).view(conns.dtype).reshape(-1, dim1)
    logger.debug('\tremoved {} duplicates'.format(dim0 - conns.shape[0]))
    #
    return conns
Example #44
0
    def testMainSingle(self, verbose=VERBOSE.PLOT):
        import time

        # setup
        V = VERBOSE(verbose)
        TF = 21
        NC = 2
        spike_proto_sc = sp.cos(sp.linspace(-sp.pi, 3 * sp.pi, TF))
        spike_proto_sc *= sp.hanning(TF)
        scale = sp.linspace(0, 2, TF)
        xi1 = sp.vstack((spike_proto_sc * 5 * scale,
                         spike_proto_sc * 4 * scale)).T
        xi2 = sp.vstack((spike_proto_sc * .5 * scale[::-1],
                         spike_proto_sc * 9 * scale[::-1])).T
        templates = sp.asarray([xi1, xi2])
        LEN = 2000
        noise = sp.randn(LEN, NC)
        ce = TimeSeriesCovE(tf_max=TF, nc=NC)
        ce.update(noise)
        FB = BOTMNode(
            templates=templates,
            ce=ce,
            verbose=V,
            ovlp_taus=None)
        signal = sp.zeros_like(noise)
        NPOS = 4
        POS = [(int(i * LEN / (NPOS + 1)), 100) for i in xrange(1, NPOS + 1)]
        POS.append((100, 2))
        POS.append((150, 2))
        for pos, tau in POS:
            signal[pos:pos + TF] += xi1
            signal[pos + tau:pos + tau + TF] += xi2
        x = sp.ascontiguousarray(signal + noise, dtype=sp.float32)

        # test against
        if V.has_print:
            print '### constructed spike times ###'
        test_u0 = sorted([t_tpl[0] for t_tpl in POS])
        test_u1 = sorted([t_tpl[0] + t_tpl[1] for t_tpl in POS])
        test_rval = {0: sp.array(test_u0) + TF / 2, 1: sp.array(test_u1) + TF / 2}
        if V.has_print:
            print test_rval

        # sort
        tic_o = time.clock()
        FB(x)
        toc_o = time.clock()
        if V.has_print:
            print '### sorting spike times ###'
            print FB.rval

        if V.has_plot:
            FB.plot_template_set(show=False)
            FB.plot_sorting(show=True)

        if V.has_print:
            print '###'
            print 'duration:', toc_o - tic_o

        for k in FB.rval:
            assert_array_almost_equal(FB.rval[k], test_rval[k], decimal=0)
def itemfreq(a):
    items,ind, inv = sp.unique(a, return_inverse=True,return_index=True)
    freq = sp.bincount(inv)
    return sp.array([ind, items, freq]).T

if __name__ in "__main__":
    f = h5py.File(sys.argv[1])

    raw = f['Genotype']['raw'][:]
    sample_ids = f['Genotype']['sample_ids'][:]
    chr_index = f['Genotype']['chr_index'][:]
    position_index = f['Genotype']['position_index'][:]

    rawT = raw.T
    snp_strings = sp.ascontiguousarray(rawT).view(sp.dtype((sp.void,rawT.dtype.itemsize * rawT.shape[1])))
    frequencies = itemfreq(snp_strings)

    ind = sp.where(frequencies[:,2]<=int(sys.argv[3]))[0]
    indices = sp.array(frequencies[ind,0],dtype="int")
    
    print "Number of Samples:\t\t\t", raw.shape[0]
    print "Number of SNPs before Filtering:\t", raw.shape[1]
    print "Number of truly unique SNPs:\t\t", sp.where(frequencies[ind,2]==1)[0].shape[0]
    print sp.histogram(frequencies[:,2],bins=[1,2,5,10,50,100,500,1000,2000])

    out = h5py.File(sys.argv[2],'w')
    g = out.create_group("Genotype")
    g.create_dataset("raw",data=raw[:,indices],chunks=True)
    g.create_dataset("sample_ids",data=sample_ids,chunks=True)
    g.create_dataset("chr_index",data=chr_index[indices],chunks=True)