Exemple #1
0
    def __init__(self,wcs,axes,counts=None,roi_radius_deg=180.,roi_msk=None):
        super(FITSImage, self).__init__(axes,counts=counts,
                                        var=copy.deepcopy(counts))
        
        self._wcs = wcs
        self._roi_radius_deg = roi_radius_deg
        self._header = self._wcs.to_header(True)
        
        self._lon = self._header['CRVAL1']
        self._lat = self._header['CRVAL2']
        
        self._roi_msk = np.empty(shape=self._counts.shape[:2],dtype=bool)
        self._roi_msk.fill(False)
        
        if not roi_msk is None: self._roi_msk |= roi_msk
        
        xpix, ypix = np.meshgrid(self.axis(0).center,self.axis(1).center)
        xpix = np.ravel(xpix)
        ypix = np.ravel(ypix)
        
#        self._pix_lon, self._pix_lat = self._wcs.wcs_pix2sky(xpix,ypix, 0)
        self._pix_lon, self._pix_lat = self._wcs.wcs_pix2world(xpix,ypix, 0)

        self.add_roi_msk(self._lon,self._lat,roi_radius_deg,True,
                         self.axis(1)._coordsys)
def column_or_1d(y, warn=False):
    """ Ravel column or 1d numpy array, else raises an error

    Parameters
    ----------
    y : array-like

    warn : boolean, default False
       To control display of warnings.

    Returns
    -------
    y : array

    """
    shape = np.shape(y)
    if len(shape) == 1:
        return np.ravel(y)
    if len(shape) == 2 and shape[1] == 1:
        if warn:
            warnings.warn("A column-vector y was passed when a 1d array was"
                          " expected. Please change the shape of y to "
                          "(n_samples, ), for example using ravel().",
                          DataConversionWarning, stacklevel=2)
        return np.ravel(y)

    raise ValueError("bad input shape {0}".format(shape))
Exemple #3
0
    def __find_index(self, lat, lon, latvar, lonvar, n=1):
        if self._kdt.get(latvar.name) is None:
            latvals = latvar[:] * RAD_FACTOR
            lonvals = lonvar[:] * RAD_FACTOR
            clat, clon = np.cos(latvals), np.cos(lonvals)
            slat, slon = np.sin(latvals), np.sin(lonvals)
            triples = np.array(list(zip(np.ravel(clat * clon),
                                        np.ravel(clat * slon),
                                        np.ravel(slat))))
            self._kdt[latvar.name] = KDTree(triples)
            del clat, clon
            del slat, slon
            del triples

        if not hasattr(lat, "__len__"):
            lat = [lat]
            lon = [lon]

        lat = np.array(lat)
        lon = np.array(lon)

        lat_rad = lat * RAD_FACTOR
        lon_rad = lon * RAD_FACTOR
        clat, clon = np.cos(lat_rad), np.cos(lon_rad)
        slat, slon = np.sin(lat_rad), np.sin(lon_rad)
        q = np.array([clat * clon, clat * slon, slat]).transpose()

        dist_sq_min, minindex_1d = self._kdt[latvar.name].query(
            np.float32(q),
            k=n
        )
        iy_min, ix_min = np.unravel_index(minindex_1d, latvar.shape)
        return iy_min, ix_min, dist_sq_min * EARTH_RADIUS
Exemple #4
0
def similarness(image1,image2):
    """
Return the correlation distance be1tween the histograms. This is 'normalized' so that
1 is a perfect match while -1 is a complete mismatch and 0 is no match.
"""
    # Open and resize images to 200x200
    i1 = Image.open(image1).resize((200,200))
    i2 = Image.open(image2).resize((200,200))

    # Get histogram and seperate into RGB channels
    i1hist = numpy.array(i1.histogram()).astype('float32')
    i1r, i1b, i1g = i1hist[0:256], i1hist[256:256*2], i1hist[256*2:]
    # Re bin the histogram from 256 bins to 48 for each channel
    i1rh = numpy.array([sum(i1r[i*16:16*(i+1)]) for i in range(16)]).astype('float32')
    i1bh = numpy.array([sum(i1b[i*16:16*(i+1)]) for i in range(16)]).astype('float32')
    i1gh = numpy.array([sum(i1g[i*16:16*(i+1)]) for i in range(16)]).astype('float32')
    # Combine all the channels back into one array
    i1histbin = numpy.ravel([i1rh, i1bh, i1gh]).astype('float32')

    # Same steps for the second image
    i2hist = numpy.array(i2.histogram()).astype('float32')
    i2r, i2b, i2g = i2hist[0:256], i2hist[256:256*2], i2hist[256*2:]
    i2rh = numpy.array([sum(i2r[i*16:16*(i+1)]) for i in range(16)]).astype('float32')
    i2bh = numpy.array([sum(i2b[i*16:16*(i+1)]) for i in range(16)]).astype('float32')
    i2gh = numpy.array([sum(i2g[i*16:16*(i+1)]) for i in range(16)]).astype('float32')
    i2histbin = numpy.ravel([i2rh, i2bh, i2gh]).astype('float32')

    return cv2.compareHist(i1histbin, i2histbin, 0)
Exemple #5
0
 def counts(self, a):
     """Returns array containing counts of each item in a.
     
     For example, on the enumeration 'UCAG', the sequence 'CCUG' would
     return the array [1,2,0,1] reflecting one count for the first item
     in the enumeration ('U'), two counts for the second item ('C'), no
     counts for the third item ('A'), and one count for the last item ('G').
     
     The result will always be a vector of Int with length equal to
     the length of the enumeration. We return Int and non an unsigned
     type because it's common to subtract counts, which produces surprising
     results on unit types (i.e. wrapraround to maxint) unless the type
     is explicitly coerced by the user.
     
     Sliently ignores any unrecognized indices, e.g. if your enumeration
     contains 'TCAG' and you get an 'X', the 'X' will be ignored because
     it has no index in the enumeration.
     """
     try:
         data = ravel(a)
     except ValueError:  #ravel failed; try coercing to array
         try:
             data = ravel(array(a))
         except ValueError: #try mapping to string
             data = ravel(array(map(str, a)))
     return sum(asarray(self._allowed_range == data, Int), axis=-1)
Exemple #6
0
def exact_roc(actuals, controls):
    """
    computes the area under the roc curve for separating to sets. Uses all
    possibl thresholds and trapezoidal interpolation. Also returns arrays of
    the true positive rate and the false positive rate.
    """

    actuals = np.ravel(actuals)
    controls = np.ravel(controls)
    if np.isnan(actuals).any():
        raise RuntimeError('NaN found in actuals')
    if np.isnan(controls).any():
        raise RuntimeError('NaN found in controls')

    thresholds = np.hstack([-np.inf,
        np.unique(np.concatenate((actuals,controls))), np.inf])[::-1]
    true_pos_rate = np.empty(thresholds.size)
    false_pos_rate = np.empty(thresholds.size)
    num_act = float(len(actuals))
    num_ctr = float(len(controls))

    for i, value in enumerate(thresholds):
        true_pos_rate[i] = (actuals >= value).sum() / num_act
        false_pos_rate[i] = (controls >= value).sum() / num_ctr
    auc = np.dot(np.diff(false_pos_rate),
            (true_pos_rate[0:-1]+true_pos_rate[1:])/2)
    return(auc, true_pos_rate, false_pos_rate)
def kdtree_fast(latvar,lonvar,lat0,lon0):
    '''
    :param latvar:
    :param lonvar:
    :param lat0:
    :param lon0:
    :return:
    '''
    rad_factor = pi/180.0 # for trignometry, need angles in radians
    # Read latitude and longitude from file into numpy arrays
    latvals = latvar[:] * rad_factor
    lonvals = lonvar[:] * rad_factor
    ny,nx = latvals.shape
    clat,clon = cos(latvals),cos(lonvals)
    slat,slon = sin(latvals),sin(lonvals)
    # Build kd-tree from big arrays of 3D coordinates
    triples = list(zip(ravel(clat*clon), ravel(clat*slon), ravel(slat)))
    kdt = cKDTree(triples)
    lat0_rad = lat0 * rad_factor
    lon0_rad = lon0 * rad_factor
    clat0,clon0 = cos(lat0_rad),cos(lon0_rad)
    slat0,slon0 = sin(lat0_rad),sin(lon0_rad)
    dist_sq_min, minindex_1d = kdt.query([clat0*clon0, clat0*slon0, slat0])
    iy_min, ix_min = unravel_index(minindex_1d, latvals.shape)
    return iy_min,ix_min
Exemple #8
0
def equal(a, b, exact):
    if array_equal(a, b):
        return True

    if hasattr(a, 'dtype') and a.dtype in ['f4','f8']:
        nnans = isnan(a).sum()
        if nnans > 0:
            # For results containing NaNs, just check that the number
            # of NaNs is the same in both arrays.  This check could be
            # made more exhaustive, but checking element by element in
            # python space is very expensive in general.
            return nnans == isnan(b).sum()
        ninfs = isinf(a).sum()
        if ninfs > 0:
            # Ditto for Inf's
            return ninfs == isinf(b).sum()
    if exact:
        return (shape(a) == shape(b)) and alltrue(ravel(a) == ravel(b), axis=0)
    else:
        if hasattr(a, 'dtype') and a.dtype == 'f4':
            atol = 1e-5   # Relax precission for special opcodes, like fmod
        else:
            atol = 1e-8
        return (shape(a) == shape(b) and
                allclose(ravel(a), ravel(b), atol=atol))
Exemple #9
0
    def __eq__(self, other):
        if not isinstance(other, DenseMatrix) or self.numRows != other.numRows or self.numCols != other.numCols:
            return False

        self_values = np.ravel(self.toArray(), order="F")
        other_values = np.ravel(other.toArray(), order="F")
        return all(self_values == other_values)
Exemple #10
0
    def predict(self, X, return_std=False):
        """
        Perform classification on test vectors X.

        Parameters
        ----------
        X : {array-like, object with finite length or shape}
            Training data, requires length = n_samples

        return_std : boolean, optional
            Whether to return the standard deviation of posterior prediction.
            All zeros in this case.

        Returns
        -------
        y : array, shape = [n_samples]  or [n_samples, n_outputs]
            Predicted target values for X.

        y_std : array, shape = [n_samples]  or [n_samples, n_outputs]
            Standard deviation of predictive distribution of query points.
        """
        check_is_fitted(self, "constant_")
        n_samples = _num_samples(X)

        y = np.full((n_samples, self.n_outputs_), self.constant_,
                    dtype=np.array(self.constant_).dtype)
        y_std = np.zeros((n_samples, self.n_outputs_))

        if self.n_outputs_ == 1 and not self.output_2d_:
            y = np.ravel(y)
            y_std = np.ravel(y_std)

        return (y, y_std) if return_std else y
Exemple #11
0
def create_edisp(event_class, event_type, erec, egy, cth):
    """Create an array of energy response values versus energy and
    inclination angle.

    Parameters
    ----------
    egy : `~numpy.ndarray`
        Energy in MeV.

    cth : `~numpy.ndarray`
        Cosine of the incidence angle.

    """
    irf = create_irf(event_class, event_type)
    theta = np.degrees(np.arccos(cth))
    v = np.zeros((len(erec), len(egy), len(cth)))
    m = (erec[:,None] / egy[None,:] < 3.0) & (erec[:,None] / egy[None,:] > 0.33333)
    #    m |= ((erec[:,None] / egy[None,:] < 3.0) &
    #          (erec[:,None] / egy[None,:] > 0.5) & (egy[None,:] < 10**2.5))    
    m = np.broadcast_to(m[:,:,None], v.shape)

    try:    
        x = np.ones(v.shape)*erec[:,None,None]
        y = np.ones(v.shape)*egy[None,:,None]
        z = np.ones(v.shape)*theta[None,None,:]
        v[m] = irf.edisp().value(np.ravel(x[m]), np.ravel(y[m]), np.ravel(z[m]), 0.0)
    except:
        for i, x in enumerate(egy):
            for j, y in enumerate(theta):
                m = (erec / x < 3.0) & (erec / x > 0.333)
                v[m, i, j] = irf.edisp().value(erec[m], x, y, 0.0)
        
    return v
Exemple #12
0
    def _returnXY(self):
        """
        Returns gridded points as a vector
        """
        X,Y = np.meshgrid(self.xgrd,self.ygrd)

        return np.column_stack((np.ravel(X),np.ravel(Y)))
Exemple #13
0
 def _check_hessian(self):
     if self.ff.system.cell.nvec != 0:
         # external rotations should be implemented properly for periodic systems.
         # 1D -> one external rotation, 2D and 3D -> no external rotation
         raise NotImplementedError('The hessian test is only working for isolated systems')
     # compute hessian
     hessian = estimate_cart_hessian(self.ff)
     # construct basis of external/internal degrees (rows)
     x, y, z = self.ff.system.pos.T
     natom = self.ff.system.natom
     ext_basis = np.array([
         [1.0, 0.0, 0.0]*natom,
         [0.0, 1.0, 0.0]*natom,
         [0.0, 0.0, 1.0]*natom,
         # TODO: this assumes geometry is centered for good conditioning
         np.ravel(np.array([np.zeros(natom), z, -y]).T),
         np.ravel(np.array([-z, np.zeros(natom), x]).T),
         np.ravel(np.array([y, -x, np.zeros(natom)]).T),
     ]).T
     u, s, vt = np.linalg.svd(ext_basis, full_matrices=True)
     rank = (s > s.max()*1e-10).sum() # for linear and
     int_basis = u[:,rank:]
     # project hessian
     int_hessian = np.dot(int_basis.T, np.dot(hessian, int_basis))
     evals = np.linalg.eigvalsh(int_hessian)
     self.num_neg_evals = (evals < 0).sum()
     # call tamkin as double check
     import tamkin
     system = self.ff.system
     mol = tamkin.Molecule(system.numbers, system.pos, system.masses, self.energy, self.gpos, hessian)
     nma = tamkin.NMA(mol, tamkin.ConstrainExt())
     invcm = lightspeed/centimeter
     #print nma.freqs/invcm
     self.num_neg_evals = (nma.freqs < 0).sum()
Exemple #14
0
def plot_3d_covariance(mean, cov):
    o,w,h = covariance_ellipse(cov,3)
    # rotate width and height to x,y axis
    wx = abs(w*np.cos(o) + h*np.sin(o))*1.2
    wy = abs(h*np.cos(o) - w*np.sin(o))*1.2
    # scale w 拿来重用
    if wx > wy:
        w = wx
    else:
        w = wy

    minx = mean[0] - w
    maxx = mean[0] + w
    miny = mean[1] - w
    maxy = mean[1] + w

    xs = np.arange(minx, maxx, (maxx-minx)/40.)
    ys = np.arange(miny, maxy, (maxy-miny)/40.)
    xv, yv = np.meshgrid (xs, ys)

    zs = np.array([100.* stats.multivariate_normal.pdf(np.array([x,y]),mean,cov) \
                   for x,y in zip(np.ravel(xv), np.ravel(yv))])
    zv = zs.reshape(xv.shape)

    ax = plt.figure().add_subplot(111, projection='3d')
    ax.plot_surface(xv, yv, zv, rstride=1, cstride=1, cmap=cm.autumn)

    ax.set_xlabel('X')
    ax.set_ylabel('Y')

    ax.contour(xv, yv, zv, zdir='x', offset=minx-1, cmap=cm.autumn)
    ax.contour(xv, yv, zv, zdir='y', offset=maxy, cmap=cm.BuGn)
    def __init__(self, con_id=None, onset=None, amplitude=None):
        """
        Parameters
        ----------
        con_id: array of shape (n_events), type = string, optional
               identifier of the events
        onset: array of shape (n_events), type = float, optional,
               onset time (in s.) of the events
        amplitude: array of shape (n_events), type = float, optional,
                   amplitude of the events (if applicable)
        """
        self.con_id = con_id
        self.onset = onset
        self.amplitude = amplitude
        self.n_event = 0
        if con_id is not None:
            self.n_events = len(con_id)
            try:
                # this is only for backward compatibility:
                # if con_id were integers, they become a string
                self.con_id = np.array(["c" + str(int(float(c))) for c in con_id])
            except:
                self.con_id = np.ravel(np.array(con_id)).astype("str")

        if onset is not None:
            if len(onset) != self.n_events:
                raise ValueError("inconsistent definition of ids and onsets")
            self.onset = np.ravel(np.array(onset)).astype(np.float)
        if amplitude is not None:
            if len(amplitude) != self.n_events:
                raise ValueError("inconsistent definition of amplitude")
            self.amplitude = np.ravel(np.array(amplitude))
        self.type = "event"
        self.n_conditions = len(np.unique(self.con_id))
    def gen_batch_in_memory(self, X, nn_finder, nb_q, prior_factor):
        """Generate batch, assuming X is loaded in memory in the main program"""

        while True:
            # Select idx at random for the batch
            idx = np.random.choice(X.shape[0], self.batch_size, replace=False)

            X_batch_color = X[idx]

            X_batch_black = X_batch_color[:, :1, :, :]
            X_batch_ab = X_batch_color[:, 1:, :, :]
            npts, c, h, w = X_batch_ab.shape
            X_a = np.ravel(X_batch_ab[:, 0, :, :])
            X_b = np.ravel(X_batch_ab[:, 1, :, :])
            X_batch_ab = np.vstack((X_a, X_b)).T

            Y_batch = self.get_soft_encoding(X_batch_ab, nn_finder, nb_q)
            # Add prior weight to Y_batch
            idx_max = np.argmax(Y_batch, axis=1)
            weights = prior_factor[idx_max].reshape(Y_batch.shape[0], 1)
            Y_batch = np.concatenate((Y_batch, weights), axis=1)
            # # Reshape Y_batch
            Y_batch = Y_batch.reshape((npts, h, w, nb_q + 1))

            yield X_batch_black, X_batch_color, Y_batch
            def producer():

                try:
                    # Load the data from HDF5 file
                    with h5py.File(self.hdf5_file, "r") as hf:
                        num_chan, height, width = self.X_shape[-3:]
                        # Select start_idx at random for the batch
                        idx_start = np.random.randint(0, self.X_shape[0] - self.batch_size)
                        idx_end = idx_start + self.batch_size
                        # Get X and y
                        X_batch_color = hf["%s_lab_data" % self.dset][idx_start: idx_end, :, :, :]

                        X_batch_black = X_batch_color[:, :1, :, :]
                        X_batch_ab = X_batch_color[:, 1:, :, :]
                        npts, c, h, w = X_batch_ab.shape
                        X_a = np.ravel(X_batch_ab[:, 0, :, :])
                        X_b = np.ravel(X_batch_ab[:, 1, :, :])
                        X_batch_ab = np.vstack((X_a, X_b)).T

                        Y_batch = self.get_soft_encoding(X_batch_ab, nn_finder, nb_q)
                        # Add prior weight to Y_batch
                        idx_max = np.argmax(Y_batch, axis=1)
                        weights = prior_factor[idx_max].reshape(Y_batch.shape[0], 1)
                        Y_batch = np.concatenate((Y_batch, weights), axis=1)
                        # # Reshape Y_batch
                        Y_batch = Y_batch.reshape((npts, h, w, nb_q + 1))

                        # Put the data in a queue
                        queue.put((X_batch_black, X_batch_color, Y_batch))
                except:
                    print("Nothing here")
Exemple #18
0
    def __init__(self,pix,name=None,title=None,z1=None,z2=None):
        # pix must be a numpy array...
        self.a = 1.0
        self.b = self.c = 0.
        self.d = -1.0
        _shape = pix.shape
        # Start assuming full image can fit in frame buffer
        self.tx = _shape[1] / 2 + _shape[1] % 2
        self.ty = _shape[0] / 2 + _shape[0] % 2
        self.dtx = _shape[1] / 2 + _shape[1] % 2
        self.dty = _shape[0] / 2 + _shape[0] % 2
               
        # Determine full range of pixel values for image
        if not z1:
            self.z1 = n.minimum.reduce(n.ravel(pix))
        else:
            self.z1 = z1
            
        if not z2:
            self.z2 = n.maximum.reduce(n.ravel(pix))
        else:
            self.z2 = z2
            
        self.zt = self._W_LINEAR
                
        if not name:
            self.name = 'Image'
        else:
            self.name = name
        self.title = title

        self.ny,self.nx = pix.shape
        self.full_ny, self.full_nx = pix.shape
Exemple #19
0
    def __call__(self, filt, mask=None):
        '''
        Provide the iterator over the levels.
        '''
        self._check_filter(filt, mask)
        # This cover method is only for one-dimensional filter functions.
        assert(self.dim==1)
        # The interval length measures indices, not filter values
        # in this case.
        self.interval_length = 1. / \
            ( self.intervals[0] - (self.intervals[0]-1)*self.fract_overlap )
        self.step_size = self.interval_length*(1-self.fract_overlap)

        if mask is None:
            self.n = len(self.filt)
            self.sortorder = np.argsort(np.ravel(self.filt))
        else:
            idx = np.flatnonzero(mask)
            self.n = len(idx)
            sortorder = np.argsort(np.ravel(self.filt[mask]))
            self.sortorder = idx[sortorder]

        assert len(self.sortorder)==self.n

        self.iter = range(self.intervals[0]).__iter__()
        return self
Exemple #20
0
    def reproject(self, nj_obj, field):
        """Reproject a field of another njord inst. to the current grid"""
        if not hasattr(self,'nj_ivec'):
            self.add_njijvec(nj_obj)
        field = getattr(nj_obj, field) if type(field) is str else field
        
        if hasattr(nj_obj, 'tvec') and (len(nj_obj.tvec) == field.shape[0]):
            newfield = np.zeros(nj_obj.tvec.shape + self.llat.shape)
            for tpos in range(len(nj_obj.tvec)):
                newfield[tpos,:,:] = self.reproject(nj_obj, field[tpos,...])
            return newfield

        di = self.i2 - self.i1
        dj = self.j2 - self.j1
        xy = np.vstack((self.nj_jvec, self.nj_ivec))
        if type(field) == str:
            weights = np.ravel(nj_obj.__dict__[field])[self.nj_mask]
        else:
            weights = np.ravel(field)[self.nj_mask]
        mask = ~np.isnan(weights) 
        flat_coord = np.ravel_multi_index(xy[:,mask],(dj, di))
        sums = np.bincount(flat_coord, weights[mask])
        cnts = np.bincount(flat_coord)
        fld = np.zeros((dj, di)) * np.nan
        fld.flat[:len(sums)] = sums.astype(np.float)/cnts
        try:
            self.add_landmask()
            fld[self.landmask] = np.nan
        except:
            print "Couldn't load landmask for %s" % self.projname
        return fld
def doSetNoDataInSeriesOld(infile, nodata, outfile, outformat, options):
    fileH = gdal.Open(infile, GA_ReadOnly)
    if fileH is None:
        exitMessage('Could not open file {0}. Exit(1).'.format(infile), 1)
    
    # does not data exist?
    data = numpy.ravel( fileH.GetRasterBand(1).ReadAsArray())
    wnodata = (data==nodata)
    if wnodata.any():
        print 'No data already set. Return(0)'
        return(0)

    common = numpy.ones(data.shape)
    for iband in range(1, fileH.RasterCount):
        newdata = numpy.ravel(fileH.GetRasterBand(iband + 1).ReadAsArray())
        wnequal = data!=newdata
        common[wnequal] = 0
        gdal.TermProgress_nocb( (iband+1)/float( 2*fileH.RasterCount ) )

    # is there any constant time series?
    if common.any():
        outDrv = gdal.GetDriverByName(outformat)
        outDS = outDrv.Create(outfile, fileH.RasterXSize, fileH.RasterYSize, fileH.RasterCount, fileH.GetRasterBand(1).GetRasterDataType, options)
        outDS.SetProjection( fileH.GetProjection() )
        outDS.SetGeoTransform( fileH.GetGeoTransform() )
        #then set these time series to nodata
        for iband in range(fileH.RasterCount):
            data = numpy.ravel(fileH.GetRasterBand(iband + 1).ReadAsArray(0, 0, fileH.RasterXSize, fileH.RasterYSize))
            data[common] = nodata
            outDS.GetRasterBand( iband + 1 ).WriteArray( data.reshape(fileH.RasterYSize, fileH.RasterXSize), 0, 0)
            gdal.TermProgress_nocb( (iband+1+fileH.RasterCount) / float( 2*fileH.RasterCount ) )

    gdal.TermProgress_nocb(1)
Exemple #22
0
 def _lf_acc(self, subset, lf_idx):
   gt = self.gt._gt_vec
   pred = np.ravel(self.lf_matrix.tocsc()[:,lf_idx].todense())
   has_label = np.where(pred != 0)
   has_gt = np.where(gt != 0)
   # Get labels/gt for candidates in dev set, with label, with gt
   gd_idxs = np.intersect1d(has_label, subset)
   gd_idxs = np.intersect1d(has_gt, gd_idxs)
   gt = np.ravel(gt[gd_idxs])
   pred_sub = np.ravel(pred[gd_idxs])
   n_neg = np.sum(pred_sub == -1)
   n_pos = np.sum(pred_sub == 1)
   if np.sum(pred == -1) == 0:
     neg_acc = -1
   elif n_neg == 0:
     neg_acc = 0
   else:
     neg_acc = float(np.sum((pred_sub == -1) * (gt == -1))) / n_neg
   if np.sum(pred == 1) == 0:
     pos_acc = -1
   elif n_pos == 0:
     pos_acc = 0
   else: 
     pos_acc = float(np.sum((pred_sub == 1) * (gt == 1))) / n_pos
   return (pos_acc, n_pos, neg_acc, n_neg)
Exemple #23
0
def make_kernel_grid(freq, kernel_size, n_pix, placement_grid):
    """
    make_kernel_grid(freq,kernel_size,n_pix,placement_grid)
    
    freq ~ cyc/n_pix
    
    kernel_size ~ pix. the fwhm of the gaussian envelope, effectively the kernel radius.
    
    n_pix ~ pixels per side of square image
    
    placement_grid = (X,Y) grid of kernel centers, as from meshgrid
   
    return:
      kernel_set  = 3D numpy array of complex ripple filters ~ [number_of_filters] x [n_pix] x [n_pix]
    
    """
    iter_x = np.ravel(placement_grid[0])
    iter_y = np.ravel(placement_grid[1])

    kernel_set = np.zeros((len(iter_x), n_pix, n_pix)).astype(complex)
    count = 0
    print "constructing %d filters" % (len(iter_x))
    for x, y in zip(iter_x, iter_y):
        kernel_set[count, :, :] = complex_ripple_filter(freq, (x, y), kernel_size, n_pix)
        count += 1
    return kernel_set
Exemple #24
0
    def _binopt(self, other, op, in_shape=None, out_shape=None):
        """apply the binary operation fn to two sparse matrices"""

        # ideally we'd take the GCDs of the blocksize dimensions
        # and explode self and other to match
        other = self.__class__(other, blocksize=self.blocksize)

        # e.g. bsr_plus_bsr, etc.
        fn = getattr(sparsetools, self.format + op + self.format)

        R,C = self.blocksize

        max_bnnz = len(self.data) + len(other.data)
        indptr = np.empty_like(self.indptr)
        indices = np.empty(max_bnnz, dtype=np.intc)
        data = np.empty(R*C*max_bnnz, dtype=upcast(self.dtype,other.dtype))

        fn(self.shape[0]//R, self.shape[1]//C, R, C,
                self.indptr, self.indices, np.ravel(self.data),
                other.indptr, other.indices, np.ravel(other.data),
                indptr, indices, data)

        actual_bnnz = indptr[-1]
        indices = indices[:actual_bnnz]
        data = data[:R*C*actual_bnnz]

        if actual_bnnz < max_bnnz/2:
            indices = indices.copy()
            data = data.copy()

        data = data.reshape(-1,R,C)

        return self.__class__((data, indices, indptr), shape=self.shape)
Exemple #25
0
    def __call__(self, transform_xy, x1, y1, x2, y2):
        """
        get extreme values.

        x1, y1, x2, y2 in image coordinates (0-based)
        nx, ny : number of divisions in each axis
        """
        x_, y_ = np.linspace(x1, x2, self.nx), np.linspace(y1, y2, self.ny)
        x, y = np.meshgrid(x_, y_)
        lon, lat = transform_xy(np.ravel(x), np.ravel(y))

        # iron out jumps, but algorithm should be improved.
        # Tis is just naive way of doing and my fail for some cases.
        if self.lon_cycle is not None:
            lon0 = np.nanmin(lon)
            lon -= 360.0 * ((lon - lon0) > 180.0)
        if self.lat_cycle is not None:
            lat0 = np.nanmin(lat)
            lat -= 360.0 * ((lat - lat0) > 180.0)

        lon_min, lon_max = np.nanmin(lon), np.nanmax(lon)
        lat_min, lat_max = np.nanmin(lat), np.nanmax(lat)

        lon_min, lon_max, lat_min, lat_max = self._adjust_extremes(lon_min, lon_max, lat_min, lat_max)

        return lon_min, lon_max, lat_min, lat_max
Exemple #26
0
    def objective_function(self, fps, fjac=None, **kwargs):
        """
        Function to minimize.

        Parameters
        ----------
        fps : list
            parameters returned by the fitter
        fjac : None or list
            parameters for which to compute the jacobian
        args : list
            [model, [weights], [input coordinates]]
        """
        status = 0
        model = kwargs['model']
        weights = kwargs['weights']
        model.parameters = fps
        meas = kwargs['err']
        if 'y' in kwargs:
            args = (kwargs['x'], kwargs['y'])
        else:
            args = (kwargs['x'],)
        r = [status]
        if weights is None:
            residuals = np.ravel(model(*args) - meas)
            r.append(residuals)
        else:
            residuals = np.ravel(weights * (model(*args) - meas))
            r.append(residuals)
        if fjac is not None:
            args = args + (meas,)
            fderiv = np.array(self._wrap_deriv(fps, model, weights, *args))
            r.append(fderiv)
        return r
def cost(params, Y, R, num_features, lambdas):
    Y = np.matrix(Y)  # (1682, 943)
    R = np.matrix(R)  # (1682, 943)
    num_movies = Y.shape[0]
    num_users = Y.shape[1]

    # reshape the parameter array into parameter matrices
    X = np.matrix(np.reshape(params[:num_movies * num_features], (num_movies, num_features)))  # (1682, 10)
    Theta = np.matrix(np.reshape(params[num_movies * num_features:], (num_users, num_features)))  # (943, 10)

    # initializations
    J = 0
    X_grad = np.zeros(X.shape)  # (1682, 10)
    Theta_grad = np.zeros(Theta.shape)  # (943, 10)


    # compute the cost
    error = np.multiply((X * Theta.T) - Y, R)  # (1682, 943)
    squared_error = np.power(error, 2)  # (1682, 943)
    J = (1. / 2) * np.sum(squared_error)

    # add the cost regularization
    J = J + ((lambdas / 2) * np.sum(np.power(Theta, 2)))
    J = J + ((lambdas / 2) * np.sum(np.power(X, 2)))

    # calculate the gradients with regularization
    X_grad = (error * Theta) + (lambdas * X)
    Theta_grad = (error.T * X) + (lambdas * Theta)

    # unravel the gradient matrices into a single array
    grad = np.concatenate((np.ravel(X_grad), np.ravel(Theta_grad)))

    return J, grad
    def on_epoch_end(self, epoch, logs={}):
        model.save_weights(weightSavePath + "bestWeights_regressMOS_smallNetwork_latestModel.h5",overwrite=True)
        logging.info(" -- Epoch "+str(epoch)+" done, loss : "+ str(logs.get('loss')))

        predictedScoresVal = np.ravel(model.predict(valData,batch_size=batchSize))
        predictedScoresTest = np.ravel(model.predict(testData,batch_size=batchSize))
        sroccVal = scipy.stats.spearmanr(predictedScoresVal, valLabels)
        plccVal =  scipy.stats.pearsonr(predictedScoresVal, valLabels)
        sroccTest = scipy.stats.spearmanr(predictedScoresTest, testLabels)
        plccTest =  scipy.stats.pearsonr(predictedScoresTest, testLabels)
        t_str_val = '\nSpearman corr for validation set is ' + str(sroccVal[0]) + '\nPearson corr for validation set is '+ str(plccVal[0]) + '\nMean absolute error for validation set is ' + str(np.mean(np.abs(predictedScoresVal-valLabels)))
        t_str_test = '\nSpearman corr for test set is ' + str(sroccTest[0]) + '\nPearson corr for test set is '+ str(plccTest[0]) + '\nMean absolute error for test set is ' + str(np.mean(np.abs(predictedScoresTest-testLabels)))
        print t_str_val
        print t_str_test

        mean_corr = sroccVal[0] + plccVal[0]
        if mean_corr > self.best_mean_corr:
            self.best_mean_corr = mean_corr
            model.save_weights(weightSavePath + "bestWeights_regressMOS_smallNetwork_bestCorr.h5",overwrite=True)
            printing("Best correlation loss model saved at Epoch " + str(epoch) + "\n")

        self.metric.append(logs.get("val_loss"))
        if epoch % 5 == 0:
            model.optimizer.lr.set_value(round(Decimal(0.8*model.optimizer.lr.get_value()),8))
            learningRate = model.optimizer.lr.get_value()
            printing("")
            printing("The current learning rate is: " + str(learningRate))
def test_cvxopt():
    mycvxopt.solvers().qp(0,0,0,0,0,0)
    path = '/Users/Admin/Dropbox/ml/MachineLearning_CS6140'
    with open(os.path.join(path, 'cvxopt.pkl'), 'rb') as f:
        arr = pickle.load(f)
    print 'pickle loaded'
    P = arr[0]
    q = arr[1]
    G = arr[2]
    h = arr[3]
    A = arr[4]
    b = arr[5]
    print 'input assigned'
    #     pcost       dcost       gap    pres   dres
    #0: -6.3339e+03 -5.5410e+05  2e+06  2e+00  2e-14
    #1:  5.8332e+02 -3.1277e+05  5e+05  2e-01  2e-14
    #2:  1.3585e+03 -1.3003e+05  2e+05  7e-02  2e-14
    #return np.ravel(solution['x'])
    with open(os.path.join(path, 'cvxopt_solution.pkl'), 'rb') as f:
        solution = pickle.load(f)
    print 'solution pickle loaded'

    mysolution = cvxopt.solvers.qp(P, q, G, h, A, b)
    print 'convex optimizer solved'
    if np.allclose(np.ravel(mysolution['x']), np.ravel(solution['x'])):
        print 'EQUAL!!!'
    else:
        print 'WROng!!!'
Exemple #30
0
    def add_ij(self):
        self.imat,self.jmat = np.meshgrid(np.arange(self.i2-self.i1),
                                          np.arange(self.j2-self.j1))
        self.kdijvec = np.vstack((np.ravel(self.imat),
                                  np.ravel(self.jmat))).T

        self._ijvec = np.arange(np.prod(self.imat.shape))
datalvl0=pd.concat([train_X10,test_X10],axis=0)
datalvl1=pd.get_dummies(datalvl0)
train_X1=datalvl1[0:train_X10.shape[0]]
test_X1=datalvl1[train_X10.shape[0]:]

#train_X1=train_data[train_data.columns[4]]
train_X22=train_data[train_data.columns[6]]
train_X3=train_data[train_data.columns[7]]

train_y1=train_label[train_label.columns[1:]]


train_X2=(train_X22 - train_X22.min()) / (train_X22.max() - train_X22.min())
train_X3=pd.get_dummies(train_X3)
train_X = pd.concat([train_X1, train_X2,train_X3,train_despo,train_ext,train_name,train_nameneg], axis=1)
train_y=np.ravel(train_y1)

X_train,X_test, y_train, y_test = train_test_split(train_X,train_y,test_size=0.4, random_state=0)
rf1 = RandomForestRegressor(n_estimators= 70,max_depth=40,  min_samples_split=55,
                                 min_samples_leaf=50,max_features='auto',oob_score=True, random_state=100,n_jobs=-1)
rf1.fit(X_train,y_train)
y_pred=rf1.predict(X_test)
print(log_loss(y_test, y_pred))




"""fill NA
test_X11=test_data[test_data.columns[3]]
test_X12=test_data[test_data.columns[4]]
test_X13=pd.concat([test_X11, test_X12], axis=1)
Exemple #32
0
    def attack_svm(self, server, predictor_name, kernel_type, attack_type, dimension,  query_budget, dataset=None, roundsize=5):
        if dataset is None or len(dataset) < 2:
            print("[!] Dataset too small")
            print("[*] Aborting attack...")
            raise ValueError
        if not isinstance(dataset, list):
            dataset = dataset.tolist()
        if attack_type == "retraining":
            my_model = svm.SVC(kernel=kernel_type)
            X = []
            y = []

            for datum in random.sample(dataset, query_budget):
                b = self.client.poll_server(server, predictor_name, [datum])
                X.append(datum)
                y.append(b)
            my_model.fit(X, numpy.ravel(y))
            return my_model

        elif attack_type == "adaptive retraining":
            if len(dataset) >= query_budget > roundsize:
                pool = random.sample(dataset, query_budget)
                x = []
                y = []
                n = roundsize
                t = math.ceil(query_budget / n)
                for i in range(0, n):
                    a = pool.pop(0)
                    b = self.client.poll_server(server, predictor_name, [a])[0]
                    x.append(a)
                    y.append(b)

                while min(y) == max(y):
                    for i in range(0, n):
                        a = pool.pop(0)
                        b = self.client.poll_server(server, predictor_name, [a])[0]
                        x.append(a)
                        y.append(b)
                    t -= 1
                    print("[*] Additional initial random round had to be done due to no variance")
                my_model = svm.SVC(kernel=kernel_type)
                for i in range(0, t-1):

                    my_model.fit(x, numpy.ravel(y))
                    for j in range(0, n):
                        if not pool:
                            break
                        distances = my_model.decision_function(pool).tolist()
                        closest = pool.pop(distances.index(min(distances)))
                        x.append(closest)
                        y.append(self.client.poll_server(server, predictor_name, [closest])[0])
                my_model.fit(x, numpy.ravel(y))
                return my_model
            else:
                print("[!] Error: dataset to small or roundsize bigger than query_budget")
                raise ValueError
        elif attack_type == "lowd-meek":
            if len(dataset) != 2:
                print("[!] Error: For Lowd-Meek attack, please provide exactly a positive and a negative sample")
                raise ValueError
            elif kernel_type != "linear":
                print("[!] Error: Unsupported Kernel by lowd-meek attack")
                raise ValueError
            else:
                print("[*] Initiating lowd-meek attack.")
                epsilon = 0.01
                d = 0.01
                vector1 = dataset[0]
                vector2 = dataset[1]
                vector1_category = numpy.ravel(self.client.poll_server(server, predictor_name, [vector1]))
                vector2_category = numpy.ravel(self.client.poll_server(server, predictor_name, [vector2]))
                if vector1_category == vector2_category:
                    print("[!] Error: Provided Samples are in same category")
                    raise ValueError
                else:
                    if vector1_category == [0]:
                        print(vector1_category, "is 0")
                        negative_instance = vector1
                        positive_instance = vector2
                    else:
                        print(vector2_category, "is 0")
                        negative_instance = vector2
                        positive_instance = vector1

                    #sign_witness_p = positive_instance
                    sign_witness_n = negative_instance
                    print("[+] Positive and  Negative Instance confirmed.")
                    for feature in range(0, len(sign_witness_n)):
                        print("[*] Finding Signwitness. Checking feature", feature)
                        f = sign_witness_n[feature]
                        sign_witness_n[feature] = positive_instance[feature]
                        if numpy.ravel(self.client.poll_server(server, predictor_name, [sign_witness_n])) == [1]:
                            sign_witness_p = sign_witness_n.copy()

                            sign_witness_n[feature] = f
                            f_index = feature
                            print("[+] Sign Witnesses found with feature index:", f_index)
                            break

                    weight_f = 1 * (sign_witness_p[feature] - sign_witness_n[feature]) / abs(sign_witness_p[feature] - sign_witness_n[feature])
                    # Find Negative Instance of x with gap(x) < epsilon/4
                    delta = sign_witness_p[feature] - sign_witness_n[feature]

                    seeker = sign_witness_n
                    #seeker[feature] = sign_witness_p[feature] - delta
                    #print(sign_witness_p)
                    #print(sign_witness_n)
                    while True:
                        #print("S - ", seeker)
                        pred = self.client.poll_server(server, predictor_name, [seeker])
                        #print("p:", pred)
                        if pred == [1]:
                            #print("Positive. delta", delta)
                            delta = delta / 2
                            seeker[feature] = seeker[feature] - delta
                        else:
                            #print("Negative. delta", delta)
                            if abs(delta) < epsilon/4:
                                print("[+] found hyperplane crossing", seeker)
                                break
                            delta = delta / 2
                            seeker[feature] = seeker[feature] + delta
                    # seeker should be that negative instance now.
                    crossing = seeker.copy()
                    seeker[feature] += 1
                    classification = numpy.ravel(self.client.poll_server(server, predictor_name, [seeker]))

                    dooble = seeker.copy()  # dooble is negative instance

                    weight = [0]*len(dooble)
                    #print("Weight on initieal feature", weight_f)

                    for otherfeature in range(0, len(dooble)):
                        if otherfeature == feature:
                            weight[otherfeature] = weight_f
                            continue
                        # line search on the other features
                        dooble[otherfeature] += 1/d
                        if numpy.ravel(self.client.poll_server(server, predictor_name, [dooble])) == classification:
                            #print("DIDNOTCHANGE")
                            doox = dooble.copy()
                            dooble[otherfeature] -= 2/d
                            if numpy.ravel(self.client.poll_server(server, predictor_name, [dooble])) == classification:  # if even though added 1/d class stays the same -> weigh = 0
                                weight[otherfeature] = 0
                                dooble[otherfeature] = seeker[otherfeature]
                                #print("found weightless feature,", otherfeature)
                                continue
                            else:
                                distance_max = -1/d
                        else:

                            distance_max = 1/d

                        distance_min = 0
                        distance_mid = (distance_max + distance_min) / 2
                        dooble[otherfeature] = seeker[otherfeature] + distance_mid

                        while abs(distance_mid - distance_min) > epsilon / 4:

                            if numpy.ravel(self.client.poll_server(server, predictor_name, [dooble])) != classification:

                                distance_min = distance_min
                                distance_max = distance_mid
                                distance_mid = (distance_min + distance_max) / 2
                                dooble[otherfeature] = seeker[otherfeature] + distance_mid
                            else:
                                distance_min = distance_mid
                                distance_mid = (distance_min + distance_max) / 2
                                distance_max = distance_max
                                dooble[otherfeature] = seeker[otherfeature] + distance_mid
                        test = seeker[otherfeature]-dooble[otherfeature]
                        weight[otherfeature] = weight_f / test
                        continue
                    print("[+] Found Weights", weight)
                    a = -(weight[0] / weight[1])
                    intercept = crossing[1] - a * crossing[0]
                    print("[+] Found Intercept (2d)", intercept)

                    class LinearMockSVM:
                        def __init__(self, w__, b__):
                            self.w__ = w__
                            self.b__ = b__*w__[1]  # norm

                        def predict(self, val):
                            rv = []
                            for v in val:
                                #print(numpy.sign(numpy.dot(self.w__, v) - self.b__))
                                rv.append(0) if numpy.sign(numpy.dot(self.w__, v) - self.b__) == -1 else rv.append(1)
                            return rv
                    return LinearMockSVM(weight, intercept)
        else:
            print("Error: Unknown attack type")
            raise ValueError
Exemple #33
0
    def predict(self, X):
        """Perform classification on test vectors X.

        Parameters
        ----------
        X : {array-like, object with finite length or shape}
            Training data, requires length = n_samples

        Returns
        -------
        y : array, shape = [n_samples] or [n_samples, n_outputs]
            Predicted target values for X.
        """
        check_is_fitted(self, 'classes_')

        # numpy random_state expects Python int and not long as size argument
        # under Windows
        n_samples = _num_samples(X)
        rs = check_random_state(self.random_state)

        n_classes_ = self.n_classes_
        classes_ = self.classes_
        class_prior_ = self.class_prior_
        constant = self.constant
        if self.n_outputs_ == 1:
            # Get same type even for self.n_outputs_ == 1
            n_classes_ = [n_classes_]
            classes_ = [classes_]
            class_prior_ = [class_prior_]
            constant = [constant]
        # Compute probability only once
        if self.strategy == "stratified":
            proba = self.predict_proba(X)
            if self.n_outputs_ == 1:
                proba = [proba]

        if self.sparse_output_:
            class_prob = None
            if self.strategy in ("most_frequent", "prior"):
                classes_ = [np.array([cp.argmax()]) for cp in class_prior_]

            elif self.strategy == "stratified":
                class_prob = class_prior_

            elif self.strategy == "uniform":
                raise ValueError("Sparse target prediction is not "
                                 "supported with the uniform strategy")

            elif self.strategy == "constant":
                classes_ = [np.array([c]) for c in constant]

            y = random_choice_csc(n_samples, classes_, class_prob,
                                  self.random_state)
        else:
            if self.strategy in ("most_frequent", "prior"):
                y = np.tile([classes_[k][class_prior_[k].argmax()] for
                             k in range(self.n_outputs_)], [n_samples, 1])

            elif self.strategy == "stratified":
                y = np.vstack(classes_[k][proba[k].argmax(axis=1)] for
                              k in range(self.n_outputs_)).T

            elif self.strategy == "uniform":
                ret = [classes_[k][rs.randint(n_classes_[k], size=n_samples)]
                       for k in range(self.n_outputs_)]
                y = np.vstack(ret).T

            elif self.strategy == "constant":
                y = np.tile(self.constant, (n_samples, 1))

            if self.n_outputs_ == 1 and not self.output_2d_:
                y = np.ravel(y)

        return y
Exemple #34
0
	from sklearn.ensemble import GradientBoostingRegressor
	from sklearn import linear_model
	import matplotlib.pyplot as plt
	import numpy as np
	
	classifier 		= 'GBR'
	crossval  		= 1;
	loc_train 		= 'train.vw'
	loc_test  		= 'test.vw'
	loc_submission 	= 'treePreds.txt' 
	
#	features,labels,ids 			= makeFeatureArray(loc_train)	# extract train features 
#	testFeatures,testLabels,testids 	= makeFeatureArray(loc_test)		# extract test features
	
	if crossval==1:
		x_train, x_test, y_train, y_test = train_test_split(features, np.ravel(labels), test_size=0.33, random_state=42) 	# separate train and test sets
	
		if classifier == 'RF':
			clf = RandomForestClassifier(n_estimators=400,n_jobs=4)		# create random forest
			clf.fit(x_train,y_train) 							# train classifier
			predictions = clf.predict_proba(np.ravel(x_test))			# generate predictions
			fpr,tpr,tr	= roc_curve(y_test,predictions,1)	
			score 	= roc_auc_score(y_test,predictions)
		if classifier == 'RR':
			clf = linear_model.Ridge(alpha = 0.5)					# create Ridge regression
			clf.fit(x_train,y_train) 							# train classifier
			predictions = clf.predict(x_test)						# generate predictions
			fpr,tpr,tr	= roc_curve(y_test,predictions,1)	
			score 	= roc_auc_score(y_test,predictions)	
		if classifier == 'GBR':	
			clf = GradientBoostingRegressor(n_estimators=100, loss='ls').fit(x_train, y_train)			
     print ('Collecting files for {} experiment'.format(name))
     
     ZSOCRE_CUT = 2.0
     
     #•Get protocol type 
     protocol = str(filelist.loc[[manip],['Protocol']].values.ravel()[0])
     print (protocol)
 
     global_reshape = global_reshape-1
     
     scanspot_list = ['Scanspot 1','Scanspot 2','Scanspot 3','Scanspot 4']
     
     files = []
     
     for i in range(global_reshape.size):
         idx_list = np.ravel(filelist.loc[[manip],[scanspot_list[i]]].values)
         record_list  = []
         
         for idx in idx_list :
             if isinstance(idx,str) == True:
     
                 record_list.append(idx)
                 
         files.append(record_list)
         
     files = np.asarray(files)
     
     #----------------------ZEBRIN BANDS & ORIENTATION-------------------------------
     _BANDS_micron =  df.loc[['%s'%name],'P2- contra':'P2- ipsi']  #Selects values in xcl file from P2- contra to P2- ipsi
         
     _BANDS_norm =  df.loc[['%s norm_P1-'%name],'P2- contra':'P2- ipsi']  #Selects values in xcl file from P2- contra to P2- ipsi
Exemple #36
0
print("Loss val: " + str(values[0]))
print("Accuracy val: " + str(values[1]))

values_t = model.evaluate(x=[Q1_test, Q2_test], y=y_test)
print("Loss test: " + str(values_t[0]))
print("Accuracy test: " + str(values_t[1]))

#Model evaluation
"""
i/p:validation, test
o/p: results_val.txt,results_test.txt
"""

yhat_probs_val = model.predict([Q1_val, Q2_val], verbose=0)
yhat_probs_test = model.predict([Q1_test, Q2_test], verbose=0)
y_pred = np.ravel(yhat_probs_val).tolist()
df_pred = pd.DataFrame()
df_pred['pred_classes'] = y_pred
y_val = np.ravel(y_val).tolist()
df_pred['True Y_val'] = y_val
excel_ = df_pred.to_excel(path + "Pred_val_2.xlsx", index=None, header=True)

y_pred_t = np.ravel(yhat_probs_test).tolist()
df_pred_t = pd.DataFrame()
df_pred_t['pred_classes'] = y_pred_t
y_test = np.ravel(y_test).tolist()
df_pred_t['True Y_test'] = y_test
excel_t = df_pred_t.to_excel(path + "Pred_test.xlsx", index=None, header=True)

y_pr_val = (df_pred['pred_classes']).tolist()
y_val_tr = (df_pred['True Y_val']).tolist()
def get_data(audio, eeg, audio_unatt=None, idx_eeg=None, num_batch=None, idx_sample=None, num_context=1, num_predict=1, dct_params=None):
    """Select a sequence of audio, audio_unattnd, and eeg data
    
    Reshape the selected data into num_batch frames for prediction.
    
    Arguments
    ---------
    audio : (num_part, num_samples)
    eeg : (num_part, num_ch, num_samples)
    idx_sample : row idx of audio and eeg data
        Defaults to a random sample if not specified
    num_context : scalar
        Total number of samples of input used to predict an output sample.
        If one-to-one mapping with no delay, num_context=1    
        
    num_predict : scalar
        Total number of time samples to be predicted in the output
        
    dct_params['idx_keep_audioTime']:  index of samples into the time vector
    Returns
    -------
    X : Variable (num_batch, num_ch * num_context + num_context) eeg + audio
    
    y : Variable (num_batch, class)
        
    z_unatt : None
    
    """
    
    if (dct_params is not None) and ('idx_keep_audioTime' in dct_params):
        idx_keep_audioTime = dct_params['idx_keep_audioTime']
    else:
        idx_keep_audioTime = None
               
    
    ######################################################################################################
    import numpy as np
    import scipy
    import scipy.signal
    import torch
    from torch.autograd import Variable
    import sklearn
    import sklearn.preprocessing
    import time
    
    a = audio[idx_sample] # selected audio part
    e = eeg[idx_sample] # selected eeg part
    au = audio_unatt[idx_sample] # selected unattended audio
    
    
    # Trim off NaNs
    idx_a = np.logical_not(np.isnan(a))
    idx_e = np.logical_not(np.isnan(e[1]))
    if np.abs(np.sum(idx_a) - np.sum(idx_e)) > 3:
        print('unequal samples')
    idx_keep = np.logical_and(idx_a, idx_e)
    a = a[idx_keep]
    e = e[:, idx_keep]
    au = au[idx_keep]

    if a.shape[0] >= num_context:
        # Make a conv matrix out of the eeg
        # Make a conv matrix out of the attended audio
        # Make a conv matrix out of the unattended audio
        
        # Cat [X_eeg, X_audio], y = 1
        # Cat [X_eeg, X_audio_unatt], y = 0
        # Return X, y
        
        # No frame shifts are needed.
        
        num_time = a.size - num_context + 1
        num_ch = e.shape[0]

        if idx_keep_audioTime is None:
            num_column_audio = num_context
            idx_keep_audioTime = np.arange(num_context)
        else:
            num_column_audio = np.size(idx_keep_audioTime)
        
        X_eeg = np.nan * np.ones((num_time, num_ch, num_column_audio))
        X_audio = np.nan * np.ones((num_time, num_column_audio))
        X_audio_unatt = np.nan * np.ones((num_time, num_column_audio))        
        
        print(X_eeg.shape)
        for idx in range(num_time):
            idx_keep = np.arange(num_context) + idx
            for idx_ch in range(num_ch):
                X_eeg[idx, idx_ch] = np.ravel(e[idx_ch, idx_keep])[idx_keep_audioTime]
            X_audio[idx] = np.ravel(a[idx_keep])[idx_keep_audioTime]
            X_audio_unatt[idx] = np.ravel(au[idx_keep])[idx_keep_audioTime]
        X_audio = X_audio[:, None, :]
        X_audio_unatt = X_audio_unatt[:, None, :]
        
        X1 = np.concatenate((X_eeg, X_audio), axis=1)
        X0 = np.concatenate((X_eeg, X_audio_unatt), axis=1)
        X = np.concatenate((X0, X1), axis=0)
        y = np.concatenate((np.zeros((num_time, 1)), np.ones((num_time, 1))), axis=0)

        X = Variable(torch.from_numpy(X).type('torch.FloatTensor'))
        y = Variable(torch.from_numpy(np.array(y)).type('torch.FloatTensor'))        
        z_unatt = None
        
    else:
        print('-warning, too little data-')
        X = None
        y = None
        z_unatt = None
        a = None
        a_unatt = None

    return X, y, z_unatt
Exemple #38
0
count = 0
for file in filelist[550:]:
    data = []
    with open('/projects/kumar-lab/mehtav/normalised_vd/' + file, 'rb') as f:
        f.seek(0)
        data = pickle.load(f)

    # data = [points/conf/vel, traj_number]
    # Each traj has shape (nrow x 12 x 2) or (nrow by 12)
    print(count)
    count = count + 1
    for t in range(len(data[1])):
        p_traj = data[0][t]
        c_traj = data[1][t]
        v_traj = data[2][t]
        op_traj = []
        for i in range(v_traj.shape[0]):
            s = np.ravel(np.delete(
                p_traj[i], CENTER_SPINE_INDEX,
                0))  # Removing center spine, as it is always at the origin
            a = np.ravel(
                np.delete(v_traj[i], CENTER_SPINE_INDEX,
                          0))  # Removing center spine, as it is always at rest
            s = np.delete(s, 17, 0)  # Removing x coordinate of base tail
            a = np.delete(a, 17, 0)  # Removing x coordinate of base tail
            op_traj.append((s, a))
        with open(
                '/projects/kumar-lab/mehtav/sa_traj/' + file[:-23] + '_' +
                str(t) + '_sa.pkl', 'wb') as f:
            pickle.dump(op_traj, f)
Exemple #39
0
    def calc_SS(self, smooth_jacobian=True):
        """
        now set up the secondary spectrum defined by:

        delay = theta^2, i.e. 0.5 L/c = 1
        doppler = theta, i.e. V/lambda = 1

        therefore differential delay (td), and differential doppler (fd) are:
            td = (thetax+thetagx)^2 +(thetay+thetagy)^2 - thetagx^2-thetagy^2
            fd = (thetax + thetagx) - thetagx = thetax
            Jacobian = 1/(thetay+thetagy)
        thetay + thetagy =
            sqrt(td - (thetax + thetagx)^2 + thetagx^2 + thetagy^2)

        the arc is defined by (thetay+thetagy) == 0 where there is a half order
        singularity.

        The singularity creates a problem in the code because the sampling in
        fd,td is not synchronized with the arc position, so there can be some
        very bright points if the sample happens to lie very close to the
        singularity.

        this is not a problem in interpreting the secondary spectrum, but it
        causes large artifacts when Fourier transforming it to get the ACF.

        So I [Bill Coles, in original Matlab code] have limited the Jacobian by
        not allowing (thetay+thetagy) to be less than half the step size in
        thetax and thetay.
        """

        fd = np.arange(-self.nf, self.nf, self.df)
        td = np.arange(-self.nt, self.nt, self.dt)
        self.fd = fd
        self.td = td
        # now get the thetax and thetay corresponding to fd and td
        # first initialize arrays all of same size
        amp = np.zeros((len(td), len(fd)))
        thetax = np.zeros((len(td), len(fd)))
        thetay = np.zeros((len(td), len(fd)))
        SS = np.zeros((len(td), len(fd)))
        for ifd in range(0, len(fd)):
            for itd in range(0, len(td)):
                thetax[itd, ifd] = fd[ifd] - self.thetagx + self.thetarx
                thetayplusthetagysq = td[itd] - \
                    (thetax[itd, ifd] + self.thetagx)**2 + self.thetarx**2 + \
                    self.thetary**2
                if thetayplusthetagysq > 0:
                    thymthgy = np.sqrt(thetayplusthetagysq)  # thetay-thetagy
                    thetay[itd, ifd] = thymthgy - self.thetagy
                    if thymthgy < 0.5*self.df:
                        if smooth_jacobian:
                            amp[itd, ifd] = (np.arcsin(1) -
                                             np.arcsin((thetax[itd, ifd] -
                                                        0.5*self.df) /
                                             thymthgy))/self.df
                        else:
                            amp[itd, ifd] = 2/self.df  # bound Jacobian
                    else:
                        amp[itd, ifd] = 1/thymthgy  # Jacobian
                else:
                    amp[itd, ifd] = 10**(-6)  # on or outside primary arc

        self.thetax = thetax
        self.thetay = thetay

        # now get secondary spectrum by interpolating in the brightness array
        # and multiplying by the Jacobian of the tranformation from (td,fd) to
        # (thx,thy)

        SS = griddata((np.ravel(self.X), np.ravel(self.Y)), np.ravel(self.B),
                      (np.ravel(thetax), np.ravel(thetay)), method='linear') \
            * np.ravel(amp)
        SS = np.reshape(SS, (len(td), len(fd)))

        # now add the SS with the sign of td and fd changed
        # unfortunately that is not simply reversing the matrix
        # however if you take just SS(1:, 1:) then it can be reversed and
        # added to the original

        SSrev = np.flip(np.flip(SS[1:, 1:], axis=0), axis=1)
        SS[1:, 1:] += SSrev
        self.SS = SS
        self.LSS = 10*np.log10(SS)
        return
Exemple #40
0
data_path_merge = '/media/sf_2_PhD_2013_-2014/1PhD_WorkDocs/PhD_Data-calculations/data/sms-call-internet-mi/csv/csv/'
#data_path_merge = '/media/sf_2_PhD_2013_-2014/1PhD_WorkDocs/PhD_Data-calculations/data/OI_precipitazione_nov_dic_2013/csv2/'

data_files = glob.glob(data_path + '*.tif')

for data_file in data_files:
    ts = data_file[108:118]
    date_time = ''.join(
        [ts[0:4], '-', ts[4:6], '-', ts[6:8], 'T', ts[8:10], ':00:00+0100'])
    grid = gdal.Open(data_file)
    #grid = gdal.Open('/media/sf_2_PhD_2013_-2014/1PhD_WorkDocs/PhD_Data-calculations/data/OI_precipitazione_nov_dic_2013/gtiff/PR_2013111505UTCplus1.tif')
    array = np.array(grid.GetRasterBand(1).ReadAsArray())
    arrayt = array.T
    arrayflip = np.fliplr(arrayt)
    datavector = np.ravel(arrayflip, order='K')
    df = pd.DataFrame()
    df['sid'] = sid
    df['date_time'] = date_time
    #df.drop(['sid'],inplace=True,axis=1)
    df['rain'] = datavector
    df = df[['date_time', 'sid', 'rain']]
    df.to_csv(path_or_buf=data_path_out + ts + '.csv',
              sep='\t',
              index=False,
              header=False,
              columns=['date_time', 'sid', 'rain'])

# merging all created txt into one txt
data_files_out = glob.glob(data_path_out + '*.csv')
with open(data_path_merge + 'rain.csv', 'a') as out_file:
    def train(self):

        if (self.status != 'init'):
            print("Please load train data and init W first.")
            return self.W

        self.status = 'train'

        # P = Q, q = p, G = -A, h = -c

        if (self.svm_kernel == 'soft_polynomial_kernel'
                or self.svm_kernel == 'soft_gaussian_kernel'):

            original_X = self.train_X[:, 1:]

            K = utility.Kernel.kernel_matrix(self, original_X)

            P = cvxopt.matrix(np.outer(self.train_Y, self.train_Y) * K)
            q = cvxopt.matrix(np.ones(self.data_num) * -1)
            constrain1 = np.diag(np.ones(self.data_num) * -1)
            constrain2 = np.identity(self.data_num)
            G = cvxopt.matrix(np.vstack((constrain1, constrain2)))
            constrain1 = np.zeros(self.data_num) * -1
            constrain2 = np.ones(self.data_num) * self.C
            h = cvxopt.matrix(np.hstack((constrain1, constrain2)))
            A = cvxopt.matrix(self.train_Y, (1, self.data_num))
            b = cvxopt.matrix(0.0)
            cvxopt.solvers.options['show_progress'] = False
            solution = cvxopt.solvers.qp(P, q, G, h, A, b)

            # Lagrange multipliers
            a = np.ravel(solution['x'])
            self.alpha = a
            # Support vectors have non zero lagrange multipliers
            sv = a > 1e-5
            self.sv_index = np.arange(len(a))[sv]
            self.sv_alpha = a[sv]
            self.sv_X = original_X[sv]
            self.sv_Y = self.train_Y[sv]

            free_sv = np.logical_and(a > 1e-5, a < self.C)
            self.free_sv_index = np.arange(len(a))[free_sv]
            self.free_sv_alpha = a[free_sv]
            self.free_sv_X = original_X[free_sv]
            self.free_sv_Y = self.train_Y[free_sv]
            '''
            sum_short_b = 0
            for i in range(len(self.free_sv_alpha)):
                sum_short_b += self.free_sv_Y[i]
                for j in range(len(self.free_sv_alpha)):
                    if (self.svm_kernel == 'soft_polynomial_kernel'):
                        sum_short_b -= self.free_sv_alpha[j] * self.free_sv_Y[j] * utility.Kernel.polynomial_kernel(self, original_X[self.free_sv_index[j]], original_X[self.free_sv_index[i]])
                    elif (self.svm_kernel == 'soft_gaussian_kernel'):
                        sum_short_b -= self.free_sv_alpha[j] * self.free_sv_Y[j] * utility.Kernel.gaussian_kernel(self, original_X[self.free_sv_index[j]], original_X[self.free_sv_index[i]])
            short_b = sum_short_b / len(self.free_sv_alpha)
            '''

            short_b = (np.sum(self.free_sv_Y) - np.sum(
                np.ravel(self.free_sv_alpha * self.free_sv_Y *
                         utility.Kernel.kernel_matrix(self, self.free_sv_X)))
                       ) / len(self.free_sv_alpha)

            self.sv_avg_b = short_b

        elif (self.svm_kernel == 'polynomial_kernel'
              or self.svm_kernel == 'gaussian_kernel'):

            original_X = self.train_X[:, 1:]

            K = utility.Kernel.kernel_matrix(self, original_X)

            P = cvxopt.matrix(np.outer(self.train_Y, self.train_Y) * K)
            q = cvxopt.matrix(np.ones(self.data_num) * -1)
            G = cvxopt.matrix(np.diag(np.ones(self.data_num) * -1))
            h = cvxopt.matrix(np.zeros(self.data_num) * -1)
            A = cvxopt.matrix(self.train_Y, (1, self.data_num))
            b = cvxopt.matrix(0.0)
            cvxopt.solvers.options['show_progress'] = False
            solution = cvxopt.solvers.qp(P, q, G, h, A, b)

            # Lagrange multipliers
            a = np.ravel(solution['x'])
            self.alpha = a
            # Support vectors have non zero lagrange multipliers
            sv = a > 1e-5
            self.sv_index = np.arange(len(a))[sv]
            self.sv_alpha = a[sv]
            self.sv_X = original_X[sv]
            self.sv_Y = self.train_Y[sv]
            '''
            sum_short_b = 0
            for i in range(len(self.sv_alpha)):
                sum_short_b += self.sv_Y[i]
                for j in range(len(self.sv_alpha)):
                    if (self.svm_kernel == 'polynomial_kernel'):
                        sum_short_b -= self.sv_alpha[j] * self.sv_Y[j] * utility.Kernel.polynomial_kernel(self, original_X[self.sv_index[j]], original_X[self.sv_index[i]])
                    elif (self.svm_kernel == 'gaussian_kernel'):
                        sum_short_b -= self.sv_alpha[j] * self.sv_Y[j] * utility.Kernel.gaussian_kernel(self, original_X[self.sv_index[j]], original_X[self.sv_index[i]])
            short_b = sum_short_b / len(self.sv_alpha)
            '''

            short_b = (np.sum(self.sv_Y) - np.sum(
                np.ravel(self.sv_alpha * self.sv_Y *
                         utility.Kernel.kernel_matrix(self, self.sv_X)))
                       ) / len(self.sv_alpha)

            self.sv_avg_b = short_b

        elif (self.svm_kernel == 'dual_hard_margin'):

            original_X = self.train_X[:, 1:]

            P = cvxopt.matrix(
                np.outer(self.train_Y, self.train_Y) *
                np.dot(original_X, np.transpose(original_X)))
            q = cvxopt.matrix(np.ones(self.data_num) * -1)
            G = cvxopt.matrix(np.diag(np.ones(self.data_num) * -1))
            h = cvxopt.matrix(np.zeros(self.data_num) * -1)
            A = cvxopt.matrix(self.train_Y, (1, self.data_num))
            b = cvxopt.matrix(0.0)
            cvxopt.solvers.options['show_progress'] = False
            solution = cvxopt.solvers.qp(P, q, G, h, A, b)

            # Lagrange multipliers
            a = np.ravel(solution['x'])
            self.alpha = a
            # Support vectors have non zero lagrange multipliers
            sv = a > 1e-5
            self.sv_index = np.arange(len(a))[sv]
            self.sv_alpha = a[sv]
            self.sv_X = original_X[sv]
            self.sv_Y = self.train_Y[sv]

            short_w = np.zeros(self.data_demension - 1)
            for i in range(len(self.sv_alpha)):
                short_w += self.sv_alpha[i] * self.sv_Y[i] * self.sv_X[i]

            sum_short_b = 0
            for i in range(len(self.sv_alpha)):
                sum_short_b += self.sv_Y[i] - np.dot(
                    np.transpose(short_w), original_X[self.sv_index[i]])
            short_b = sum_short_b / len(self.sv_alpha)

            self.sv_avg_b = short_b

            self.W = np.insert(short_w, 0, short_b)

        else:
            # primal_hard_margin
            eye_process = np.eye(self.data_demension)
            eye_process[0][0] = 0
            P = cvxopt.matrix(eye_process)
            q = cvxopt.matrix(np.zeros(self.data_demension))
            G = cvxopt.matrix(
                np.reshape(self.train_Y, (-1, 1)) * self.train_X * -1)
            h = cvxopt.matrix(np.ones(self.data_num) * -1)
            cvxopt.solvers.options['show_progress'] = False
            solution = cvxopt.solvers.qp(P, q, G, h)
            self.W = np.array(solution['x'])
            self.W = np.ravel(self.W)

        return self.W
Exemple #42
0
def _min_or_max_filter(input, size, footprint, structure, output, mode,
                       cval, origin, minimum):
    if structure is None:
        if footprint is None:
            if size is None:
                raise RuntimeError("no footprint provided")
            separable = True
        else:
            footprint = numpy.asarray(footprint)
            footprint = footprint.astype(bool)
            if numpy.alltrue(numpy.ravel(footprint), axis=0):
                size = footprint.shape
                footprint = None
                separable = True
            else:
                separable = False
    else:
        structure = numpy.asarray(structure, dtype=numpy.float64)
        separable = False
        if footprint is None:
            footprint = numpy.ones(structure.shape, bool)
        else:
            footprint = numpy.asarray(footprint)
            footprint = footprint.astype(bool)
    input = numpy.asarray(input)
    if numpy.iscomplexobj(input):
        raise TypeError('Complex type not supported')
    output, return_value = _ni_support._get_output(output, input)
    origins = _ni_support._normalize_sequence(origin, input.ndim)
    if separable:
        sizes = _ni_support._normalize_sequence(size, input.ndim)
        axes = list(range(input.ndim))
        axes = [(axes[ii], sizes[ii], origins[ii])
                               for ii in range(len(axes)) if sizes[ii] > 1]
        if minimum:
            filter_ = minimum_filter1d
        else:
            filter_ = maximum_filter1d
        if len(axes) > 0:
            for axis, size, origin in axes:
                filter_(input, int(size), axis, output, mode, cval, origin)
                input = output
        else:
            output[...] = input[...]
    else:
        fshape = [ii for ii in footprint.shape if ii > 0]
        if len(fshape) != input.ndim:
            raise RuntimeError('footprint array has incorrect shape.')
        for origin, lenf in zip(origins, fshape):
            if (lenf // 2 + origin < 0) or (lenf // 2 + origin >= lenf):
                raise ValueError('invalid origin')
        if not footprint.flags.contiguous:
            footprint = footprint.copy()
        if structure is not None:
            if len(structure.shape) != input.ndim:
                raise RuntimeError('structure array has incorrect shape')
            if not structure.flags.contiguous:
                structure = structure.copy()
        mode = _ni_support._extend_mode_to_code(mode)
        _nd_image.min_or_max_filter(input, footprint, structure, output,
                                    mode, cval, origins, minimum)
    return return_value
Exemple #43
0
def plot_density(
    ax,
    all_labels,
    to_plot,
    colors,
    bw,
    figsize,
    length_plotters,
    rows,
    cols,
    titlesize,
    xt_labelsize,
    linewidth,
    markersize,
    credible_interval,
    point_estimate,
    hpd_markers,
    outline,
    shade,
    n_data,
    data_labels,
    backend_kwargs,
    show,
):
    """Matplotlib densityplot."""
    if ax is None:
        _, ax = _create_axes_grid(
            length_plotters,
            rows,
            cols,
            figsize=figsize,
            squeeze=False,
            backend="matplotlib",
            backend_kwargs=backend_kwargs,
        )
    else:
        ax = np.atleast_2d(ax)

    axis_map = {label: ax_ for label, ax_ in zip(all_labels, np.ravel(ax))}

    for m_idx, plotters in enumerate(to_plot):
        for var_name, selection, values in plotters:
            label = make_label(var_name, selection)
            _d_helper(
                values.flatten(),
                label,
                colors[m_idx],
                bw,
                titlesize,
                xt_labelsize,
                linewidth,
                markersize,
                credible_interval,
                point_estimate,
                hpd_markers,
                outline,
                shade,
                axis_map[label],
            )

    if n_data > 1:
        for m_idx, label in enumerate(data_labels):
            ax[0].plot([], label=label, c=colors[m_idx], markersize=markersize)
        ax[0].legend(fontsize=xt_labelsize)

    if backend_show(show):
        plt.show()

    return ax
def numpy_ravel_array(a):
    return np.ravel(a)
    def kneighbors_graph(self, X=None, n_neighbors=None,
                         mode='connectivity'):
        """Computes the (weighted) graph of k-Neighbors for points in X

        Parameters
        ----------
        X : array-like, shape (n_queries, n_features), \
                or (n_queries, n_indexed) if metric == 'precomputed'
            The query point or points.
            If not provided, neighbors of each indexed point are returned.
            In this case, the query point is not considered its own neighbor.

        n_neighbors : int
            Number of neighbors for each sample.
            (default is value passed to the constructor).

        mode : {'connectivity', 'distance'}, optional
            Type of returned matrix: 'connectivity' will return the
            connectivity matrix with ones and zeros, in 'distance' the
            edges are Euclidean distance between points.

        Returns
        -------
        A : sparse graph in CSR format, shape = [n_queries, n_samples_fit]
            n_samples_fit is the number of samples in the fitted data
            A[i, j] is assigned the weight of edge that connects i to j.

        Examples
        --------
        >>> X = [[0], [3], [1]]
        >>> from sklearn.neighbors import NearestNeighbors
        >>> neigh = NearestNeighbors(n_neighbors=2)
        >>> neigh.fit(X)
        NearestNeighbors(n_neighbors=2)
        >>> A = neigh.kneighbors_graph(X)
        >>> A.toarray()
        array([[1., 0., 1.],
               [0., 1., 1.],
               [1., 0., 1.]])

        See also
        --------
        NearestNeighbors.radius_neighbors_graph
        """
        check_is_fitted(self)
        if n_neighbors is None:
            n_neighbors = self.n_neighbors

        # check the input only in self.kneighbors

        # construct CSR matrix representation of the k-NN graph
        if mode == 'connectivity':
            A_ind = self.kneighbors(X, n_neighbors, return_distance=False)
            n_queries = A_ind.shape[0]
            A_data = np.ones(n_queries * n_neighbors)

        elif mode == 'distance':
            A_data, A_ind = self.kneighbors(
                X, n_neighbors, return_distance=True)
            A_data = np.ravel(A_data)

        else:
            raise ValueError(
                'Unsupported mode, must be one of "connectivity" '
                'or "distance" but got "%s" instead' % mode)

        n_queries = A_ind.shape[0]
        n_samples_fit = self.n_samples_fit_
        n_nonzero = n_queries * n_neighbors
        A_indptr = np.arange(0, n_nonzero + 1, n_neighbors)

        kneighbors_graph = csr_matrix((A_data, A_ind.ravel(), A_indptr),
                                      shape=(n_queries, n_samples_fit))

        return kneighbors_graph
 def to_str(row):
     return ''.join(map(str, np.ravel(row.astype(int))))
Exemple #47
0
 def setForces(self, force):
     self.force = force
     self.nodal.n_f[:] = np.ravel(force)
 def _f(x):
     return -np.ravel(np.cos(x) + np.sin(3 * x))
Exemple #49
0
    def __mul__(self, other):
        """interpret other and call one of the following

        self._mul_scalar()
        self._mul_vector()
        self._mul_multivector()
        self._mul_sparse_matrix()
        """

        M, N = self.shape

        if other.__class__ is np.ndarray:
            # Fast path for the most common case
            if other.shape == (N, ):
                return self._mul_vector(other)
            elif other.shape == (N, 1):
                return self._mul_vector(other.ravel()).reshape(M, 1)
            elif other.ndim == 2 and other.shape[0] == N:
                return self._mul_multivector(other)

        if isscalarlike(other):
            # scalar value
            return self._mul_scalar(other)

        if issparse(other):
            if self.shape[1] != other.shape[0]:
                raise ValueError('dimension mismatch')
            return self._mul_sparse_matrix(other)

        try:
            other.shape
        except AttributeError:
            # If it's a list or whatever, treat it like a matrix
            other = np.asanyarray(other)

        other = np.asanyarray(other)

        if other.ndim == 1 or other.ndim == 2 and other.shape[1] == 1:
            # dense row or column vector
            if other.shape != (N, ) and other.shape != (N, 1):
                raise ValueError('dimension mismatch')

            result = self._mul_vector(np.ravel(other))

            if isinstance(other, np.matrix):
                result = np.asmatrix(result)

            if other.ndim == 2 and other.shape[1] == 1:
                # If 'other' was an (nx1) column vector, reshape the result
                result = result.reshape(-1, 1)

            return result

        elif other.ndim == 2:
            ##
            # dense 2D array or matrix ("multivector")

            if other.shape[0] != self.shape[1]:
                raise ValueError('dimension mismatch')

            result = self._mul_multivector(np.asarray(other))

            if isinstance(other, np.matrix):
                result = np.asmatrix(result)

            return result
        else:
            raise ValueError('could not interpret dimensions')
Exemple #50
0
 def add_sample(self, sample):
     s = []
     for k in range(len(sample)):
         s.append(np.ravel(sample[k]))
     self.samples.append(s)
Exemple #51
0
    def fit(self, X, y, sample_weight=None):
        """
        Fit linear model.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Training data

        y : array-like of shape (n_samples,) or (n_samples, n_targets)
            Target values. Will be cast to X's dtype if necessary

        sample_weight : array-like of shape (n_samples,), default=None
            Individual weights for each sample

            .. versionadded:: 0.17
               parameter *sample_weight* support to LinearRegression.

        Returns
        -------
        self : returns an instance of self.
        """

        n_jobs_ = self.n_jobs
        X, y = self._validate_data(X,
                                   y,
                                   accept_sparse=['csr', 'csc', 'coo'],
                                   y_numeric=True,
                                   multi_output=True)

        if sample_weight is not None:
            sample_weight = _check_sample_weight(sample_weight,
                                                 X,
                                                 dtype=X.dtype)

        X, y, X_offset, y_offset, X_scale = self._preprocess_data(
            X,
            y,
            fit_intercept=self.fit_intercept,
            normalize=self.normalize,
            copy=self.copy_X,
            sample_weight=sample_weight,
            return_mean=True)

        if sample_weight is not None:
            # Sample weight can be implemented via a simple rescaling.
            X, y = _rescale_data(X, y, sample_weight)

        if sp.issparse(X):
            X_offset_scale = X_offset / X_scale

            def matvec(b):
                return X.dot(b) - b.dot(X_offset_scale)

            def rmatvec(b):
                return X.T.dot(b) - X_offset_scale * np.sum(b)

            X_centered = sparse.linalg.LinearOperator(shape=X.shape,
                                                      matvec=matvec,
                                                      rmatvec=rmatvec)

            if y.ndim < 2:
                out = sparse_lsqr(X_centered, y)
                self.coef_ = out[0]
                self._residues = out[3]
            else:
                # sparse_lstsq cannot handle y with shape (M, K)
                outs = Parallel(n_jobs=n_jobs_)(
                    delayed(sparse_lsqr)(X_centered, y[:, j].ravel())
                    for j in range(y.shape[1]))
                self.coef_ = np.vstack([out[0] for out in outs])
                self._residues = np.vstack([out[3] for out in outs])
        else:
            self.coef_, self._residues, self.rank_, self.singular_ = \
                linalg.lstsq(X, y)
            self.coef_ = self.coef_.T

        if y.ndim == 1:
            self.coef_ = np.ravel(self.coef_)
        self._set_intercept(X_offset, y_offset, X_scale)
        return self
def main():
    usage = 'usage: %prog [options] <params_file> <model_file> <data_file>'
    parser = OptionParser(usage)
    parser.add_option(
        '-a',
        dest='act_t',
        default=0.5,
        type='float',
        help=
        'Activation threshold (as proportion of max) to consider for PWM [Default: %default]'
    )
    parser.add_option('-d',
                      dest='model_hdf5_file',
                      default=None,
                      help='Pre-computed model output as HDF5.')
    parser.add_option('-o', dest='out_dir', default='.')
    parser.add_option('-m',
                      dest='meme_db',
                      default='%s/data/motifs/Homo_sapiens.meme' %
                      os.environ['BASSETDIR'],
                      help='MEME database used to annotate motifs')
    parser.add_option(
        '-p',
        dest='plot_heats',
        default=False,
        action='store_true',
        help=
        'Plot heat maps describing filter activations in the test sequences [Default: %default]'
    )
    parser.add_option(
        '-s',
        dest='sample',
        default=None,
        type='int',
        help='Sample sequences from the test set [Default:%default]')
    parser.add_option(
        '-t',
        dest='trim_filters',
        default=False,
        action='store_true',
        help=
        'Trim uninformative positions off the filter ends [Default: %default]')
    (options, args) = parser.parse_args()

    if len(args) != 3:
        parser.error(
            'Must provide Basenji parameters and model files and test data in HDF5'
            ' format.')
    else:
        params_file = args[0]
        model_file = args[1]
        data_file = args[2]

    if not os.path.isdir(options.out_dir):
        os.mkdir(options.out_dir)

    #################################################################
    # load data

    data_open = h5py.File(data_file)

    test_seqs1 = data_open['test_in']
    test_targets = data_open['test_out']

    try:
        target_names = list(data_open['target_labels'])
    except KeyError:
        target_names = ['t%d' % ti for ti in range(test_targets.shape[1])]

    if options.sample is not None:
        # choose sampled indexes
        sample_i = sorted(
            random.sample(range(test_seqs1.shape[0]), options.sample))

        # filter
        test_seqs1 = test_seqs1[sample_i]
        test_targets = test_targets[sample_i]

    # convert to letters
    test_seqs = basenji.dna_io.hot1_dna(test_seqs1)

    #################################################################
    # model parameters and placeholders

    job = basenji.dna_io.read_job_params(params_file)

    job['seq_length'] = test_seqs1.shape[1]
    job['seq_depth'] = test_seqs1.shape[2]
    job['num_targets'] = test_targets.shape[2]
    job['target_pool'] = int(np.array(data_open.get('pool_width', 1)))

    t0 = time.time()
    dr = basenji.seqnn.SeqNN()
    dr.build(job)
    print('Model building time %ds' % (time.time() - t0))

    # adjust for fourier
    job['fourier'] = 'train_out_imag' in data_open
    if job['fourier']:
        test_targets_imag = data_open['test_out_imag']
        if options.valid:
            test_targets_imag = data_open['valid_out_imag']

    #################################################################
    # predict

    # initialize batcher
    if job['fourier']:
        batcher_test = basenji.batcher.BatcherF(test_seqs1,
                                                test_targets,
                                                test_targets_imag,
                                                batch_size=dr.batch_size,
                                                pool_width=job['target_pool'])
    else:
        batcher_test = basenji.batcher.Batcher(test_seqs1,
                                               test_targets,
                                               batch_size=dr.batch_size,
                                               pool_width=job['target_pool'])

    # initialize saver
    saver = tf.train.Saver()

    with tf.Session() as sess:
        # load variables into session
        saver.restore(sess, model_file)

        # get weights
        filter_weights = sess.run(dr.filter_weights[0])
        filter_weights = np.transpose(np.squeeze(filter_weights), [2, 1, 0])
        print(filter_weights.shape)

        # test
        t0 = time.time()
        layer_filter_outs, _ = dr.hidden(sess, batcher_test, layers=[0])
        filter_outs = layer_filter_outs[0]
        print(filter_outs.shape)

    # store useful variables
    num_filters = filter_weights.shape[0]
    filter_size = filter_weights.shape[2]

    #################################################################
    # individual filter plots
    #################################################################
    # also save information contents
    filters_ic = []
    meme_out = meme_intro('%s/filters_meme.txt' % options.out_dir, test_seqs)

    for f in range(num_filters):
        print('Filter %d' % f)

        # plot filter parameters as a heatmap
        plot_filter_heat(filter_weights[f, :, :],
                         '%s/filter%d_heat.pdf' % (options.out_dir, f))

        # write possum motif file
        filter_possum(filter_weights[f, :, :], 'filter%d' % f,
                      '%s/filter%d_possum.txt' % (options.out_dir, f),
                      options.trim_filters)

        # plot weblogo of high scoring outputs
        plot_filter_logo(filter_outs[:, :, f],
                         filter_size,
                         test_seqs,
                         '%s/filter%d_logo' % (options.out_dir, f),
                         maxpct_t=options.act_t)

        # make a PWM for the filter
        filter_pwm, nsites = make_filter_pwm('%s/filter%d_logo.fa' %
                                             (options.out_dir, f))

        if nsites < 10:
            # no information
            filters_ic.append(0)
        else:
            # compute and save information content
            filters_ic.append(info_content(filter_pwm))

            # add to the meme motif file
            meme_add(meme_out, f, filter_pwm, nsites, options.trim_filters)

    meme_out.close()

    #################################################################
    # annotate filters
    #################################################################
    # run tomtom
    subprocess.call(
        'tomtom -dist pearson -thresh 0.1 -oc %s/tomtom %s/filters_meme.txt %s'
        % (options.out_dir, options.out_dir, options.meme_db),
        shell=True)

    # read in annotations
    filter_names = name_filters(num_filters,
                                '%s/tomtom/tomtom.txt' % options.out_dir,
                                options.meme_db)

    #################################################################
    # print a table of information
    #################################################################
    table_out = open('%s/table.txt' % options.out_dir, 'w')

    # print header for later panda reading
    header_cols = ('', 'consensus', 'annotation', 'ic', 'mean', 'std')
    print('%3s  %19s  %10s  %5s  %6s  %6s' % header_cols, file=table_out)

    for f in range(num_filters):
        # collapse to a consensus motif
        consensus = filter_motif(filter_weights[f, :, :])

        # grab annotation
        annotation = '.'
        name_pieces = filter_names[f].split('_')
        if len(name_pieces) > 1:
            annotation = name_pieces[1]

        # plot density of filter output scores
        fmean, fstd = plot_score_density(
            np.ravel(filter_outs[:, :, f]),
            '%s/filter%d_dens.pdf' % (options.out_dir, f))

        row_cols = (f, consensus, annotation, filters_ic[f], fmean, fstd)
        print('%-3d  %19s  %10s  %5.2f  %6.4f  %6.4f' % row_cols,
              file=table_out)

    table_out.close()

    #################################################################
    # global filter plots
    #################################################################
    if options.plot_heats:
        # plot filter-sequence heatmap
        plot_filter_seq_heat(filter_outs,
                             '%s/filter_seqs.pdf' % options.out_dir)

        # plot filter-segment heatmap
        plot_filter_seg_heat(filter_outs,
                             '%s/filter_segs.pdf' % options.out_dir)
        plot_filter_seg_heat(filter_outs,
                             '%s/filter_segs_raw.pdf' % options.out_dir,
                             whiten=False)

        # plot filter-target correlation heatmap
        plot_target_corr(filter_outs, seq_targets, filter_names, target_names,
                         '%s/filter_target_cors_mean.pdf' % options.out_dir,
                         'mean')
        plot_target_corr(filter_outs, seq_targets, filter_names, target_names,
                         '%s/filter_target_cors_max.pdf' % options.out_dir,
                         'max')
def lerp(sdata, condition):
    xx, yy = np.meshgrid(np.arange(sdata.shape[1]), np.arange(sdata.shape[0]))
    xym = np.vstack((np.ravel(xx[condition]), np.ravel(yy[condition]))).T
    values = np.ravel(sdata[:, :][condition])
    interp = scipy.interpolate.LinearNDInterpolator(xym, values)
    return interp(np.ravel(xx), np.ravel(yy)).reshape(xx.shape)
Exemple #54
0
    def measure_fock(self, modes, select=None):
        """
        Measures a list of modes.
        """
        # pylint: disable=singleton-comparison
        if select is not None and np.any(np.array(select) == None):
            raise NotImplementedError(
                "Post-selection lists must only contain numerical values.")

        # Make sure the state is mixed
        if self._pure:
            state = ops.mix(self._state, self._num_modes)
        else:
            state = self._state

        if select is not None:
            # perform post-selection

            # make sure modes and select are the same length
            if len(select) != len(modes):
                raise ValueError(
                    "When performing post-selection, the number of "
                    "selected values (including None) must match the number of measured modes"
                )

            # make sure the select values are all integers or nones
            if not all(isinstance(s, int) or s is None for s in select):
                raise TypeError(
                    "The post-select list elements either be integers or None")

            # modes to measure
            measure = [i for i, s in zip(modes, select) if s is None]

            # modes already post-selected:
            selected = [i for i, s in zip(modes, select) if s is not None]
            select_values = [s for s in select if s is not None]

            # project out postselected modes
            self._state = ops.project_reset(selected, select_values,
                                            self._state, self._pure,
                                            self._num_modes, self._trunc)

            if self.norm() == 0:
                raise ZeroDivisionError("Measurement has zero probability.")

            self._state = self._state / self.norm()

        else:
            # no post-selection; modes to measure are the modes provided
            measure = modes

        if len(measure) > 0:
            # sampling needs to be performed
            # Compute distribution by tracing out modes not measured, then computing the diagonal
            unmeasured = [
                i for i in range(self._num_modes) if i not in measure
            ]
            reduced = ops.partial_trace(state, self._num_modes, unmeasured)
            dist = np.ravel(ops.diagonal(reduced, len(measure)).real)

            # Make a random choice
            if sum(dist) != 1:
                # WARNING: distribution is not normalized, could hide errors
                i = np.random.choice(list(range(len(dist))),
                                     p=dist / sum(dist))
            else:
                i = np.random.choice(list(range(len(dist))), p=dist)

            permuted_outcome = ops.unIndex(i, len(measure), self._trunc)

            # Permute the outcome to match the order of the modes in 'measure'
            permutation = np.argsort(measure)
            outcome = [0] * len(measure)
            for i in range(len(measure)):
                outcome[permutation[i]] = permuted_outcome[i]

            # Project the state onto the measurement outcome & reset in vacuum
            self._state = ops.project_reset(measure, outcome, self._state,
                                            self._pure, self._num_modes,
                                            self._trunc)

            if self.norm() == 0:
                raise ZeroDivisionError("Measurement has zero probability.")

            self._state = self._state / self.norm()

        # include post-selected values in measurement outcomes
        if select is not None:
            outcome = copy.copy(select)

        return outcome
    def display_split_metrics(rf, Xt, yt, Xv, yv, target_names = None):
        if len(rf.classes_) == 2:
            numpy_yt = np.ravel(yt)
            numpy_yv = np.ravel(yv)
            if type(numpy_yt[0])==str:
                classes_ = rf.classes_
            else:
                classes_ = [str(int(rf.classes_[0])), str(int(rf.classes_[1]))]
            zt = np.zeros(len(yt))
            zv = np.zeros(len(yv))
            #zt = deepcopy(yt)
            for i in range(len(yt)):
                if numpy_yt[i] == 1:
                    zt[i] = 1
            for i in range(len(yv)):
                if numpy_yv[i] == 1:
                    zv[i] = 1
    
            predict_t = rf.predict(Xt)
            predict_v = rf.predict(Xv)
            conf_matt = confusion_matrix(y_true=yt, y_pred=predict_t)
            conf_matv = confusion_matrix(y_true=yv, y_pred=predict_v)
            prob_t = rf.predict_proba(Xt)
            prob_v = rf.predict_proba(Xv)
            print("\n")
            print("{:.<23s}{:>15s}{:>15s}".format('Model Metrics', 
                                          'Training', 'Validation'))
            print("{:.<23s}{:15d}{:15d}".format('Observations', 
                                              Xt.shape[0], Xv.shape[0]))
            
            print("{:.<23s}{:15d}{:15d}".format('Features', Xt.shape[1], 
                                                              Xv.shape[1]))
            if rf.max_depth==None:
                print("{:.<23s}{:>15s}{:>15s}".format('Maximum Tree Depth',
                                  "None", "None"))
            else:
                print("{:.<23s}{:15d}{:15d}".format('Maximum Tree Depth',
                                  rf.max_depth, rf.max_depth))
            print("{:.<23s}{:15d}{:15d}".format('Minimum Leaf Size', 
                                  rf.min_samples_leaf, rf.min_samples_leaf))
            print("{:.<23s}{:15d}{:15d}".format('Minimum split Size', 
                                  rf.min_samples_split, rf.min_samples_split))
    
            print("{:.<23s}{:15.4f}{:15.4f}".format('Mean Absolute Error', 
                          mean_absolute_error(zt,prob_t[:,1]), 
                          mean_absolute_error(zv,prob_v[:,1])))
            print("{:.<23s}{:15.4f}{:15.4f}".format('Avg Squared Error', 
                          mean_squared_error(zt,prob_t[:,1]), 
                          mean_squared_error(zv,prob_v[:,1])))
            
            acct = accuracy_score(yt, predict_t)
            accv = accuracy_score(yv, predict_v)
            print("{:.<23s}{:15.4f}{:15.4f}".format('Accuracy', acct, accv))
            if type(numpy_yt[0])==str:
                pre_t = precision_score(yt, predict_t, pos_label=classes_[1])
                tpr_t = recall_score(yt, predict_t, pos_label=classes_[1])
                f1_t  = f1_score(yt,predict_t, pos_label=classes_[1])
                pre_v = precision_score(yv, predict_v, pos_label=classes_[1])
                tpr_v = recall_score(yv, predict_v, pos_label=classes_[1])
                f1_v  = f1_score(yv,predict_v, pos_label=classes_[1])
            else:
                pre_t = precision_score(yt, predict_t)
                tpr_t = recall_score(yt, predict_t)
                f1_t  = f1_score(yt,predict_t)
                pre_v = precision_score(yv, predict_v)
                tpr_v = recall_score(yv, predict_v)
                f1_v  = f1_score(yv,predict_v)
                
            print("{:.<27s}{:11.4f}{:15.4f}".format('Precision', pre_t, pre_v))
            print("{:.<27s}{:11.4f}{:15.4f}".format('Recall (Sensitivity)', 
                  tpr_t, tpr_v))
            print("{:.<27s}{:11.4f}{:15.4f}".format('F1-score', f1_t, f1_v))
            misct_ = conf_matt[0][1]+conf_matt[1][0]
            miscv_ = conf_matv[0][1]+conf_matv[1][0]
            misct = 100*misct_/len(yt)
            miscv = 100*miscv_/len(yv)
            n_t   = [conf_matt[0][0]+conf_matt[0][1], \
                     conf_matt[1][0]+conf_matt[1][1]]
            n_v   = [conf_matv[0][0]+conf_matv[0][1], \
                     conf_matv[1][0]+conf_matv[1][1]]
            misc_ = [[0,0], [0,0]]
            misc_[0][0] = 100*conf_matt[0][1]/n_t[0]
            misc_[0][1] = 100*conf_matt[1][0]/n_t[1]
            misc_[1][0] = 100*conf_matv[0][1]/n_v[0]
            misc_[1][1] = 100*conf_matv[1][0]/n_v[1]
            print("{:.<27s}{:11d}{:15d}".format(\
                    'Total Misclassifications', misct_, miscv_))
            print("{:.<27s}{:10.1f}{:s}{:14.1f}{:s}".format(\
                    'MISC (Misclassification)', misct, '%', miscv, '%'))
            for i in range(2):
                print("{:s}{:.<16s}{:>10.1f}{:<1s}{:>14.1f}{:<1s}".format(
                      '     class ', classes_[i], 
                      misc_[0][i], '%', misc_[1][i], '%'))
            print("\n\nTraining                  Class     Class")
            print("{:<21s}{:>10s}{:>10s}".format("Confusion Matrix", 
                          classes_[0], classes_[1]) )
            for i in range(2):
                print("{:6s}{:.<15s}".format('Class ', classes_[i]), end="")
                for j in range(2):
                    print("{:>10d}".format(conf_matt[i][j]), end="")
                print("")
            
            print("\n\nValidation                Class     Class")
            print("{:<21s}{:>10s}{:>10s}".format("Confusion Matrix", 
                          classes_[0], classes_[1]) )
            for i in range(2):
                print("{:6s}{:.<15s}".format('Class ', classes_[i]), end="")
                for j in range(2):
                    print("{:>10d}".format(conf_matv[i][j]), end="")
                print("")
            # In the binary case, the classification report is incorrect
            #cr = classification_report(yv, predict_v, rf.classes_)
            #print("\n",cr)
        else:
            try:
                if len(rf.classes_) < 2:
                    raise RuntimeError("  Call to display_nominal_split_metrics "+
                      "invalid.\n  Target has less than two classes.\n")
                    sys.exit()
            except:
                raise RuntimeError("  Call to display_nominal_split_metrics "+
                      "invalid.\n  Target has less than two classes.\n")
                sys.exit()
            predict_t = rf.predict(Xt)
            predict_v = rf.predict(Xv)
            conf_mat_t = confusion_matrix(y_true=yt, y_pred=predict_t)
            conf_mat_v = confusion_matrix(y_true=yv, y_pred=predict_v)
            prob_t = rf.predict_proba(Xt) # or is this rf._predict_proba_dt ?
            prob_v = rf.predict_proba(Xv)
            
            n_classes = len(rf.classes_)
            ase_sumt = 0
            ase_sumv = 0
            misc_t = 0
            misc_v = 0
            misct  = []
            miscv  = []
            n_t    = []
            n_v    = []
            nt_obs = yt.shape[0]
            nv_obs = yv.shape[0]
            conf_matt = []
            conf_matv = []
            for i in range(n_classes):
                conf_matt.append(np.zeros(n_classes))
                conf_matv.append(np.zeros(n_classes))
            y_t = np.ravel(yt) # necessary because yt is a df with row keys
            y_v = np.ravel(yv) # likewise
            for i in range(n_classes):
                misct.append(0)
                n_t.append(0)
                miscv.append(0)
                n_v.append(0)
            for i in range(nt_obs):
                for j in range(n_classes):
                    if y_t[i] == rf.classes_[j]:
                        ase_sumt += (1-prob_t[i,j])*(1-prob_t[i,j])
                        idx = j
                    else:
                        ase_sumt += prob_t[i,j]*prob_t[i,j]
                for j in range(n_classes):
                    if predict_t[i] == rf.classes_[j]:
                        conf_matt[idx][j] += 1
                        break
                n_t[idx] += 1
                if predict_t[i] != y_t[i]:
                    misc_t     += 1
                    misct[idx] += 1
                    
            for i in range(nv_obs):
                for j in range(n_classes):
                    if y_v[i] == rf.classes_[j]:
                        ase_sumv += (1-prob_v[i,j])*(1-prob_v[i,j])
                        idx = j
                    else:
                        ase_sumv += prob_v[i,j]*prob_v[i,j]
                for j in range(n_classes):
                    if predict_v[i] == rf.classes_[j]:
                        conf_matv[idx][j] += 1
                        break
                n_v[idx] += 1
                if predict_v[i] != y_v[i]:
                    misc_v     += 1
                    miscv[idx] += 1
            misct_ = misc_t
            miscv_ = misc_v
            misc_t = 100*misc_t/nt_obs
            misc_v = 100*misc_v/nv_obs
            aset   = ase_sumt/(n_classes*nt_obs)
            asev   = ase_sumv/(n_classes*nv_obs)
            print("\n")
            print("{:.<23s}{:>15s}{:>15s}".format('Model Metrics', 
                                          'Training', 'Validation'))
            print("{:.<23s}{:15d}{:15d}".format('Observations', \
                                              Xt.shape[0], Xv.shape[0]))
            
            print("{:.<23s}{:15d}{:15d}".format('Features', Xt.shape[1], 
                                                            Xv.shape[1]))
            if type(rf) == RandomForestClassifier:
                print("{:.<23s}{:15d}{:15d}".format(\
                      'Trees in Forest', \
                      rf.n_estimators, rf.n_estimators))
            if rf.max_depth==None:
                print("{:.<23s}{:>15s}{:>15s}".format('Maximum Tree Depth',
                                  "None", "None"))
            else:
                print("{:.<23s}{:15d}{:15d}".format('Maximum Tree Depth',
                             rf.max_depth, rf.max_depth))
            print("{:.<23s}{:15d}{:15d}".format('Minimum Leaf Size', 
                             rf.min_samples_leaf, rf.min_samples_leaf))
            print("{:.<23s}{:15d}{:15d}".format('Minimum split Size', 
                             rf.min_samples_split, rf.min_samples_split))
    
            print("{:.<23s}{:15.4f}{:15.4f}".format('Avg Squared Error', 
                          aset, asev))
            
            print("{:.<23s}{:15.4f}{:15.4f}".format(\
                                    'Root ASE', sqrt(aset), sqrt(asev)))
            
            acct = accuracy_score(yt, predict_t)
            accv = accuracy_score(yv, predict_v)
            print("{:.<23s}{:15.4f}{:15.4f}".format('Accuracy', acct, accv))
            
            print("{:.<23s}{:15.4f}{:15.4f}".format('Precision', 
                          precision_score(yt,predict_t, average='macro'), 
                          precision_score(yv,predict_v, average='macro')))
            print("{:.<23s}{:15.4f}{:15.4f}".format('Recall (Sensitivity)', 
                          recall_score(yt,predict_t, average='macro'), 
                          recall_score(yv,predict_v, average='macro')))
            print("{:.<23s}{:15.4f}{:15.4f}".format('F1-score', 
                          f1_score(yt,predict_t, average='macro'), 
                          f1_score(yv,predict_v, average='macro')))
            print("{:.<27s}{:11d}{:15d}".format(\
                    'Total Misclassifications', misct_, miscv_))
            print("{:.<27s}{:10.1f}{:s}{:14.1f}{:s}".format(\
                    'MISC (Misclassification)', misc_t, '%', misc_v, '%'))

            classes_ = []
            if type(rf.classes_[0])==str:
                classes_ = rf.classes_
            else:
                for i in range(n_classes):
                    classes_.append(str(int(rf.classes_[i])))
            for i in range(n_classes):
                misct[i] = 100*misct[i]/n_t[i]
                miscv[i] = 100*miscv[i]/n_v[i]
                print("{:s}{:.<16s}{:>10.1f}{:<1s}{:>14.1f}{:<1s}".format(
                            '     class ', classes_[i], misct[i], 
                            '%', miscv[i], '%'))
    
            print("\n\nTraining")
            print("Confusion Matrix ", end="")
            for i in range(n_classes):
                print("{:>7s}{:<3s}".format('Class ', classes_[i]), 
                      end="")
            print("")
            for i in range(n_classes):
                print("{:s}{:.<6s}".format('Class ', classes_[i]), 
                      end="")
                for j in range(n_classes):
                    print("{:>10d}".format(conf_mat_t[i][j]), end="")
                print("")
                
            ct = classification_report(yt, predict_t, target_names)
            print("\nTraining \nMetrics:\n",ct)
            
            print("\n\nValidation")
            print("Confusion Matrix ", end="")
            for i in range(n_classes):
                print("{:>7s}{:<3s}".format('Class ', classes_[i]), 
                      end="")
            print("")
            for i in range(n_classes):
                print("{:s}{:.<6s}".format('Class ', classes_[i]), 
                      end="")
                for j in range(n_classes):
                    print("{:>10d}".format(conf_mat_v[i][j]), end="")
                print("")
            cv = classification_report(yv, predict_v, target_names)
            print("\nValidation \nMetrics:\n",cv)
Exemple #56
0
def precon_norm(v, ml):
    ''' helper function to calculate preconditioner norm of v '''
    v = ravel(v)
    w = ml.aspreconditioner() * v
    return sqrt(dot(v.conjugate(), w))
Exemple #57
0
constraints = []
for i in range(len(matrices)):
    constraints.append(quad_form(q_var, matrices[i]) <= t_var)
constraints.append(s_vec.T * q_var == s_vec.T * s_vec)
constraints.append(quad_form(q_var, I) <= alpha * s_vec.T * s_vec)

#===Solve System===#
obj = Minimize(t_var)
prob = Problem(obj, constraints)
prob.solve()

#===Extract===#
print "f0* =", obj.value
xopt = prob.variables()[1].value
print "q* =", xopt
q = numpy.ravel(xopt)
q_vec = numpy.asmatrix(q).T

#===Convolve===#
rss = numpy.convolve(s, s[::-1])
csq = numpy.convolve(s, q[::-1])
print "s*s = ", rss
print "s*q = ", csq

#===Make F Matrix===#
ones = numpy.r_[numpy.ones(N - 1), 0, numpy.ones(N - 1)]
F = numpy.asmatrix(numpy.diag(ones))

#===Print M/S Ratio Improvement===#
sidelobes = F * numpy.asmatrix(rss).T
mainlobe = numpy.asmatrix(rss).T - sidelobes
Exemple #58
0
    def get_energy_dependent_integration_weights(self, spin, energy):
        integration_weights = np.zeros(self._ir_weights_shape[spin])
        tetrahedra_mask = self.get_intersecting_tetrahedra(spin, energy)

        if not np.any(tetrahedra_mask):
            return integration_weights

        energies = self.ir_tetrahedra_energies[spin][tetrahedra_mask]
        e21 = self.e21[spin][tetrahedra_mask]
        e31 = self.e31[spin][tetrahedra_mask]
        e41 = self.e41[spin][tetrahedra_mask]
        e32 = self.e32[spin][tetrahedra_mask]
        e42 = self.e42[spin][tetrahedra_mask]
        e43 = self.e43[spin][tetrahedra_mask]

        cond_a_mask = (energies[:, 0] < energy) & (energy < energies[:, 1])
        cond_b_mask = (energies[:, 1] <= energy) & (energy < energies[:, 2])
        cond_c_mask = (energies[:, 2] <= energy) & (energy < energies[:, 3])

        ee1 = energy - energies[:, 0]
        ee2 = energy - energies[:, 1]
        ee3 = energy - energies[:, 2]
        e2e = energies[:, 1] - energy
        e3e = energies[:, 2] - energy
        e4e = energies[:, 3] - energy

        kpoints_idx = self.ir_tetrahedra[spin][tetrahedra_mask]
        ir_kpoints_idx = self.ir_kpoint_mapping[kpoints_idx]

        # calculate the integrand for each vertices
        vert_weights = np.zeros_like(energies)
        vert_weights[cond_a_mask] = _get_energy_dependent_weight_a(
            ee1[cond_a_mask],
            e2e[cond_a_mask],
            e3e[cond_a_mask],
            e4e[cond_a_mask],
            e21[cond_a_mask],
            e31[cond_a_mask],
            e41[cond_a_mask],
        )

        vert_weights[cond_b_mask] = _get_energy_dependent_weight_b(
            ee1[cond_b_mask],
            ee2[cond_b_mask],
            e3e[cond_b_mask],
            e4e[cond_b_mask],
            e31[cond_b_mask],
            e41[cond_b_mask],
            e32[cond_b_mask],
            e42[cond_b_mask],
        )

        vert_weights[cond_c_mask] = _get_energy_dependent_weight_c(
            ee1[cond_c_mask],
            ee2[cond_c_mask],
            ee3[cond_c_mask],
            e4e[cond_c_mask],
            e41[cond_c_mask],
            e42[cond_c_mask],
            e43[cond_c_mask],
        )

        # finally, get the integrand for each ir_kpoint by summing over all
        # tetrahedra and multiplying by the tetrahedra multiplicity and
        # tetrahedra weight; Finally, divide by the k-point multiplicity
        # to get the final weight
        band_idx, tetrahedra_idx = np.where(tetrahedra_mask)

        # include tetrahedra multiplicity
        vert_weights *= self.ir_tetrahedra_weights[tetrahedra_idx][:, None]

        flat_ir_kpoints = np.ravel(ir_kpoints_idx)
        flat_ir_weights = np.ravel(vert_weights)
        flat_bands = np.repeat(band_idx, 4)

        # sum integrand, note this sums in place and is insanely fast
        np.add.at(integration_weights, (flat_bands, flat_ir_kpoints),
                  flat_ir_weights)
        integration_weights *= (self._tetrahedron_volume /
                                self.ir_kpoint_weights[None, :])

        return integration_weights
def read_class_npvar_red(datadir='./data',
                         pfile='pvar.dat',
                         proc=0,
                         verbose=False,
                         reduce_to=-1,
                         set_reduce=-1):

    dims = pc.read_dim(datadir, proc)
    pdims = pc.read_pdim(datadir)
    npar_loc = read_npar_loc(datadir=datadir, pfile=pfile, proc=proc)
    #
    # the Next bit calculates how many particles are written for all but
    # the last processor. The last processor is assigned a number of particles
    # to write so that the required number of particles is obtained
    #
    if (reduce_to > 0):
        if (set_reduce <= 0):
            reductionfactor = float(reduce_to) / float(pdims.npar)
            npar_red = int(round(npar_loc * reductionfactor))
        else:
            npar_red = set_reduce
        if (verbose):
            #print 'reducing '+str(npar_loc)+' to '+str(npar_red)+ ' on proc'+str(proc)
            print('reducing {} to {} on proc {}'.format(
                npar_loc, npar_red, proc))
        written_parts = npar_red
    else:
        written_parts = set_reduce

    if (verbose):
        #print npar_loc,' particles on processor: ',proc # Python 2
        print(str(npar_loc) + ' particles on processor: ' + str(proc))
    mvars = pdims.mpaux + pdims.mpvar
    ltot = npar_loc * mvars
    if (dims.precision == 'S'):
        REAL = '<f4'
    else:
        REAL = '<f8'

    array_shape = np.dtype([('header', '<i4'), ('npar_loc', '<i4'),
                            ('footer', '<i4'), ('header2', '<i4'),
                            ('ipar', '<i4', npar_loc), ('footer2', '<i4'),
                            ('header3', '<i4'), ('fp', REAL, ltot),
                            ('footer3', '<i4'), ('header4', '<i4'),
                            ('t', REAL), ('x', REAL, dims.mx),
                            ('y', REAL, dims.my), ('z', REAL, dims.mz),
                            ('dx', REAL), ('dy', REAL), ('dz', REAL),
                            ('footer4', '<i4')])

    p_data = np.fromfile(datadir + '/proc' + str(proc) + '/' + pfile,
                         dtype=array_shape)
    partpars = np.array(p_data['fp'].reshape(mvars, npar_loc))

    if (reduce_to > 0):
        particle_list = map(
            lambda x: int(x),
            np.linspace(0.0, npar_loc, num=npar_red, endpoint=False))
        red_parts = partpars[:, particle_list]
        red_shape = np.dtype([('header', '<i4'), ('npar_loc', '<i4'),
                              ('footer', '<i4'), ('header2', '<i4'),
                              (
                                  'ipar',
                                  '<i4',
                                  (npar_red),
                              ), ('footer2', '<i4'), ('header3', '<i4'),
                              ('fp', REAL, npar_red * mvars),
                              ('footer3', '<i4'), ('header4', '<i4'),
                              ('t', REAL), ('x', REAL, (dims.mx, )),
                              ('y', REAL, (dims.my, )),
                              ('z', REAL, (dims.mz, )), ('dx', REAL),
                              ('dy', REAL), ('dz', REAL), ('footer4', '<i4')])

        p_red = np.array(
            [(4, npar_red, 4, (npar_red * 4),
              (np.squeeze(p_data['ipar'][0, :npar_red])), (npar_red * 4),
              (npar_red * mvars * 8), (np.squeeze(np.ravel(red_parts))),
              (npar_red * mvars * 8), (p_data['header4'][0]), (p_data['t']),
              (p_data['x']), (p_data['y']), (p_data['z']), (p_data['dx']),
              (p_data['dy']), (p_data['dz']), p_data['footer4'][0])],
            dtype=red_shape)

        p_red.tofile(datadir + '/proc' + str(proc) + '/' + str(reduce_to) +
                     '_' + pfile)

    ipar = np.squeeze(p_data['ipar'].reshape(p_data['ipar'].size))
    return ipar, partpars, written_parts
 def display_metrics(rf, X, y):
     if len(rf.classes_) == 2:
         numpy_y = np.ravel(y)
         if type(numpy_y[0])==str:
             classes_ = rf.classes_
         else:
             classes_ = [str(int(rf.classes_[0])), str(int(rf.classes_[1]))]
         z = np.zeros(len(y))
         predictions = rf.predict(X) # get binary class predictions
         conf_mat = confusion_matrix(y_true=y, y_pred=predictions)
         tmisc = conf_mat[0][1]+conf_mat[1][0]
         misc = 100*(tmisc)/(len(y))
         for i in range(len(y)):
             if numpy_y[i] == 1:
                 z[i] = 1
         probability = rf.predict_proba(X) # get binary probabilities
         #probability = rf.predict_proba(X)
         print("\nModel Metrics")
         print("{:.<27s}{:10d}".format('Observations', X.shape[0]))
         print("{:.<27s}{:10d}".format('Features', X.shape[1]))
         if rf.max_depth==None:
             print("{:.<27s}{:>10s}".format('Maximum Tree Depth',\
                               "None"))
         else:
             print("{:.<27s}{:10d}".format('Maximum Tree Depth',\
                               rf.max_depth))
         print("{:.<27s}{:10d}".format('Minimum Leaf Size', \
                               rf.min_samples_leaf))
         print("{:.<27s}{:10d}".format('Minimum split Size', \
                               rf.min_samples_split))
         print("{:.<27s}{:10.4f}".format('Mean Absolute Error', \
                       mean_absolute_error(z,probability[:, 1])))
         print("{:.<27s}{:10.4f}".format('Avg Squared Error', \
                       mean_squared_error(z,probability[:, 1])))
         acc = accuracy_score(y, predictions)
         print("{:.<27s}{:10.4f}".format('Accuracy', acc))
         if type(numpy_y[0]) == str:
             pre = precision_score(y, predictions, pos_label=classes_[1])
             tpr = recall_score(y, predictions, pos_label=classes_[1])
             f1  =  f1_score(y,predictions, pos_label=classes_[1])
         else:
             pre = precision_score(y, predictions)
             tpr = recall_score(y, predictions)
             f1 =  f1_score(y,predictions)
         print("{:.<27s}{:10.4f}".format('Precision', pre))
         print("{:.<27s}{:10.4f}".format('Recall (Sensitivity)', tpr))
         print("{:.<27s}{:10.4f}".format('F1-Score', f1))
         print("{:.<27s}{:10d}".format(\
                 'Total Misclassifications', tmisc))
         print("{:.<27s}{:9.1f}{:s}".format(\
                 'MISC (Misclassification)', misc, '%'))
         n_    = [conf_mat[0][0]+conf_mat[0][1], conf_mat[1][0]+conf_mat[1][1]]
         miscc = [100*conf_mat[0][1]/n_[0], 100*conf_mat[1][0]/n_[1]]
         for i in range(2):
             print("{:s}{:<16s}{:>9.1f}{:<1s}".format(\
                   '     class ', classes_[i], miscc[i], '%'))
         print("\n\n     Confusion     Class     Class")
         print("       Matrix", end="")
         print("{:1s}{:>10s}{:>10s}".format(" ", classes_[0], classes_[1]))
         
         for i in range(2):
             print("{:s}{:.<6s}".format('  Class ', classes_[i]), end="")
             for j in range(2):
                 print("{:>10d}".format(conf_mat[i][j]), end="")
             print("")
         print("")
     
     else:
         n_classes = len(rf.classes_)
         n_obs     = len(y)
         try:
             if n_classes < 2:
                 raise RuntimeError("  Call to display_nominal_metrics "+
                   "invalid.\n  Target has less than two classes.\n")
                 sys.exit()
         except:
             raise RuntimeError("  Call to display_nominal_metrics "+
                   "invalid.\n  Target has less than two classes.\n")
             sys.exit()
 
         np_y = np.ravel(y)
         classes_ = [" "]*len(rf.classes_)
         if type(np_y[0])==str:
             classes_ = rf.classes_
         else:
             for i in range(len(rf.classes_)):
                 classes_[i] = str(int(rf.classes_[i]))
         probability = rf.predict_proba(X) # get class probabilitie
         predictions = rf.predict(X) # get nominal class predictions
         conf_mat = confusion_matrix(y_true=y, y_pred=predictions)
         misc  = 0
         miscc = []
         n_    = []
         for i in range(n_classes):
             miscc.append(0)
             n_.append(0)
             for j in range(n_classes):
                 n_[i] = n_[i] + conf_mat[i][j]
                 if i != j:
                     misc = misc + conf_mat[i][j]
                     miscc[i] = miscc[i] + conf_mat[i][j]
             miscc[i] = 100*miscc[i]/n_[i]
         tmisc    = misc
         misc     = 100*misc/n_obs
         ase_sum  = 0
         mase_sum = 0
         for i in range(n_obs):
             for j in range(n_classes):
                 if np_y[i] == rf.classes_[j]:
                     ase_sum  += (1-probability[i,j])*(1-probability[i,j])
                     mase_sum += 1-probability[i,j]
                 else:
                     ase_sum  += probability[i,j]*probability[i,j]
                     mase_sum += probability[i,j]
         ase  = ase_sum/(n_classes*n_obs)
         mase = mase_sum/(n_classes*n_obs)
         print("\nModel Metrics")
         print("{:.<27s}{:10d}".format('Observations', X.shape[0]))
         print("{:.<27s}{:10d}".format('Features', X.shape[1]))
         if type(rf) == RandomForestClassifier:
             print("{:.<27s}{:10d}".format('Trees in Forest', \
                               rf.n_estimators))
         if rf.max_depth==None:
             print("{:.<27s}{:>10s}".format('Maximum Tree Depth',\
                               "None"))
         else:
             print("{:.<27s}{:10d}".format('Maximum Tree Depth',\
                               rf.max_depth))
         print("{:.<27s}{:10d}".format('Minimum Leaf Size', \
                               rf.min_samples_leaf))
         print("{:.<27s}{:10d}".format('Minimum split Size', \
                               rf.min_samples_split))
         
         print("{:.<27s}{:10.4f}".format('ASE', ase))
         print("{:.<27s}{:10.4f}".format('Root ASE', sqrt(ase)))
         print("{:.<27s}{:10.4f}".format('Mean Absolute Error', mase))
         acc = accuracy_score(np_y, predictions)
         print("{:.<27s}{:10.4f}".format('Accuracy', acc))
         pre = precision_score(np_y, predictions, average='macro')
         print("{:.<27s}{:10.4f}".format('Precision', pre))
         tpr = recall_score(np_y, predictions, average='macro')
         print("{:.<27s}{:10.4f}".format('Recall (Sensitivity)', tpr))
         f1 =  f1_score(np_y,predictions, average='macro')
         print("{:.<27s}{:10.4f}".format('F1-Score', f1))
         print("{:.<27s}{:10d}".format(\
                 'Total Misclassifications', tmisc))
         print("{:.<27s}{:9.1f}{:s}".format(\
                 'MISC (Misclassification)', misc, '%'))
         
         if type(rf.classes_[0]) == str:
             fstr = "{:s}{:.<16s}{:>9.1f}{:<1s}"
         else:
             fstr = "{:s}{:.<16.0f}{:>9.1f}{:<1s}"
         for i in range(len(rf.classes_)):
             print(fstr.format(\
                   '     class ', rf.classes_[i], miscc[i], '%'))      
             
         print("\n\n     Confusion")
         print("       Matrix    ", end="")
         
         if type(rf.classes_[0]) == str:
             fstr1 = "{:>7s}{:<3s}"
             fstr2 = "{:s}{:.<6s}"
         else:
             fstr1 = "{:>7s}{:<3.0f}"
             fstr2 = "{:s}{:.<6.0f}"
         for i in range(n_classes):
             print(fstr1.format('Class ', rf.classes_[i]), 
                   end="")
         print("")
         for i in range(n_classes):
             print(fstr2.format('Class ', rf.classes_[i]), 
                   end="")
             for j in range(n_classes):
                 print("{:>10d}".format(conf_mat[i][j]), end="")
             print("")
         print("")
         
         cr = classification_report(np_y, predictions, rf.classes_)
         print("\n",cr)