Exemple #1
0
def find_convex_hull(X, num_iter, num_points=None):
    """
        if num_points is set to None, find_convex_hull will return all the points in
        the convex hull (that have been found) sorted according to their sharpness.
        Otherwise, it will return the N-sharpest points.
    """
    (N, D) = X.shape
    if (num_points == None):
        num_points = N

    # randomly choose 'num_iter' direction on the unit sphere.
    # find the maximal point in the chosen direction, and add 1 to its counter.
    # only points on the convex hull will be hit, and 'sharp' corners will
    # have more hits than 'smooth' corners.
    hits = p.zeros((N, 1))
    for j in xrange(num_iter):
        a = p.randn(D)
        a = a / p.norm(a)
        i = p.dot(X, a).argmax()
        hits[i] += 1

    # don't take points with 0 hits
    num_points = min(num_points, sum(p.find(hits)))

    # the indices of the n-best points
    o = list(p.argsort(hits, 0)[xrange(-1, -(num_points + 1), -1)].flat)

    return X[o, :]
Exemple #2
0
def shiftgrid(lon0,datain,lonsin,start=True):
    """ 
 shift global lat/lon grid east or west.
 assumes wraparound (or cyclic point) is included.

 lon0:  starting longitude for shifted grid 
        (ending longitude if start=False). lon0 must be on
        input grid (with the range of lonsin).
 datain:  original data.
 lonsin:  original longitudes.
 start[True]: if True, lon0 represents he starting longitude 
 of the new grid. if False, lon0 is the ending longitude.

 returns dataout,lonsout (data and longitudes on shifted grid).
    """
    if pylab.fabs(lonsin[-1]-lonsin[0]-360.) > 1.e-4:
        raise ValueError, 'cyclic point not included'
    if lon0 < lonsin[0] or lon0 > lonsin[-1]:
        raise ValueError, 'lon0 outside of range of lonsin'
    i0 = pylab.argsort(pylab.fabs(lonsin-lon0))[0]
    dataout = pylab.zeros(datain.shape,datain.typecode())
    lonsout = pylab.zeros(lonsin.shape,lonsin.typecode())
    if start:
        lonsout[0:len(lonsin)-i0] = lonsin[i0:]
    else:
        lonsout[0:len(lonsin)-i0] = lonsin[i0:]-360.
    dataout[:,0:len(lonsin)-i0] = datain[:,i0:]
    if start:
        lonsout[len(lonsin)-i0:] = lonsin[1:i0+1]+360.
    else:
        lonsout[len(lonsin)-i0:] = lonsin[1:i0+1]
    dataout[:,len(lonsin)-i0:] = datain[:,1:i0+1]
    return dataout,lonsout
Exemple #3
0
    def classify(self, x, k):
        d = self.X - tile(x.reshape(self.n, 1), self.N)
        dsq = sum(d * d, 0)
        minindex = argmin(dsq)
        temp = argsort(dsq)

        ### Custom code starting here ###

        # Save the data for each class around the point in a array
        score = [0, 0, 0]

        # With the help of k surrounding points score each class
        for x in range(0, k):
            if ((self.c[temp[x]]) == 1.0):
                score[0] += 1
            elif ((self.c[temp[x]]) == 2.0):
                score[1] += 1
            elif ((self.c[temp[x]]) == 3.0):
                score[2] += 1

        # Check to which class the point is classified
        if (score[0] > score[1] and score[0] > score[2]):
            return 1.0
        elif (score[1] > score[2]):
            return 2.0

        # If there are points with the same value, assign the class of the nearest neighbour.
        elif (score[0] == score[1] and score[0] == score[2]):
            return self.c[minindex]
        else:
            return 3.0
Exemple #4
0
    def rank_by_distance_bhatt(self, qkeys, ikeys, rkeys, dists):
        """
        ::

            Reduce timbre-channel distances to ranks list by ground-truth key indices
            Bhattacharyya distance on timbre-channel probabilities and Kullback distances
        """
        # timbre-channel search using pre-computed distances
        ranks_list = []
        t_keys, t_lens = self.get_adb_lists(0) 
        rdists=pylab.ones(len(t_keys))*float('inf')
        qk = self._get_probs_tc(qkeys)
        for i in range(len(ikeys[0])): # number of include keys
            ikey=[]
            dk = pylab.zeros(self.timbre_channels)
            for t_chan in range(self.timbre_channels): # timbre channels
                ikey.append(ikeys[t_chan][i])
                try: 
                    # find dist of key i for query
                    i_idx = rkeys[t_chan].index( ikey[t_chan] ) # dataset include-key match
                    # the reduced distance function in include_keys order
                    # distance is Bhattacharyya distance on probs and dists
                    dk[t_chan] = dists[t_chan][i_idx]
                except:
                    print("Key not found in result list: ", ikey, "for query:", qkeys[t_chan])
                    raise error.BregmanError()
            rk = self._get_probs_tc(ikey)
            a_idx = t_keys.index( ikey[0] ) # audiodb include-key index
            rdists[a_idx] = distance.bhatt(pylab.sqrt(pylab.absolute(dk)), pylab.sqrt(pylab.absolute(qk*rk)))
        #search for the index of the relevant keys
        rdists = pylab.absolute(rdists)
        sort_idx = pylab.argsort(rdists)   # Sort fields into database order
        for r in self.ground_truth: # relevant keys
            ranks_list.append(pylab.where(sort_idx==r)[0][0]) # Rank of the relevant key
        return ranks_list, rdists
Exemple #5
0
def plot_elecs_and_neurons(neuron_dict, ext_sim_dict, neural_sim_dict):
    pl.close('all')
    fig_all = pl.figure(figsize=[15,15])
    ax_all = fig_all.add_axes([0.1, 0.1, 0.8, 0.8], frameon=False)
    for elec in xrange(len(ext_sim_dict['elec_z'])):
        ax_all.plot(ext_sim_dict['elec_z'][elec], ext_sim_dict['elec_y'][elec], color='b',\
                marker='$E%i$'%elec, markersize=20 )    
    legends = []
    for i, neur in enumerate(neuron_dict):
        folder = os.path.join(neural_sim_dict['output_folder'], neuron_dict[neur]['name'])
        coor = np.load(os.path.join(folder,'coor.npy'))
        x,y,z = coor
        n_compartments = len(x)
        fig = pl.figure(figsize=[10, 10])
        ax = fig.add_axes([0.1, 0.1, 0.8, 0.8], frameon=False)
        # Plot the electrodes
        for elec in xrange(len(ext_sim_dict['elec_z'])):
            ax.plot(ext_sim_dict['elec_z'][elec], ext_sim_dict['elec_y'][elec], color='b',\
                   marker='$%i$'%elec, markersize=20 )
        # Plot the neuron
        xmid, ymid, zmid = np.load(folder + '/coor.npy')
        xstart, ystart,zstart = np.load(folder + '/coor_start.npy')
        xend, yend, zend = np.load(folder + '/coor_end.npy')
        diam = np.load(folder + '/diam.npy')
        length = np.load(folder + '/length.npy')
        n_compartments = len(diam)
        for comp in xrange(n_compartments):
            if comp == 0:
                xcoords = pl.array([xmid[comp]])
                ycoords = pl.array([ymid[comp]])
                zcoords = pl.array([zmid[comp]])
                diams = pl.array([diam[comp]])    
            else:
                if zmid[comp] < 0.400 and zmid[comp] > -.400:  
                    xcoords = pl.r_[xcoords, pl.linspace(xstart[comp],
                                                         xend[comp], length[comp]*3*1000)]   
                    ycoords = pl.r_[ycoords, pl.linspace(ystart[comp],
                                                         yend[comp], length[comp]*3*1000)]   
                    zcoords = pl.r_[zcoords, pl.linspace(zstart[comp],
                                                         zend[comp], length[comp]*3*1000)]   
                    diams = pl.r_[diams, pl.linspace(diam[comp], diam[comp],
                                                length[comp]*3*1000)]
        argsort = pl.argsort(-xcoords)
        ax.scatter(zcoords[argsort], ycoords[argsort], s=20*(diams[argsort]*1000)**2,
                       c=xcoords[argsort], edgecolors='none', cmap='gray')
        ax_all.plot(zmid[0], ymid[0], marker='$%i$'%i, markersize=20, label='%i: %s' %(i, neur))
        #legends.append('%i: %s' %(i, neur))
        ax.axis(ext_sim_dict['plot_range'])
        ax.axis('equal')
        ax.axis(ext_sim_dict['plot_range'])
        ax.set_xlabel('z [mm]')
        ax.set_ylabel('y [mm]')
        fig.savefig(os.path.join(neural_sim_dict['output_folder'],\
                  'neuron_figs', '%s.png' % neur))
    ax_all.axis('equal')
    ax.axis(ext_sim_dict['plot_range'])
    ax_all.set_xlabel('z [mm]')
    ax_all.set_ylabel('y [mm]')
    ax_all.legend()
    fig_all.savefig(os.path.join(neural_sim_dict['output_folder'], 'fig.png'))
Exemple #6
0
    def rank_by_distance_avg(self, qkeys, ikeys, rkeys, dists):
        """
        ::

            Reduce timbre-channel distances to ranks list by ground-truth key indices
            Kullback distances
        """
        # timbre-channel search using pre-computed distances
        ranks_list = []
        t_keys, t_lens = self.get_adb_lists(0) 
        rdists=pylab.ones(len(t_keys))*float('inf')
        for t_chan in range(self.timbre_channels): # timbre channels
            t_keys, t_lens = self.get_adb_lists(t_chan) 
            for i, ikey in enumerate(ikeys[t_chan]): # include keys, results
                try: 
                    # find dist of key i for query
                    i_idx = rkeys[t_chan].index( ikey ) # lower_bounded include-key index
                    a_idx = t_keys.index( ikey ) # audiodb include-key index
                    # the reduced distance function in include_keys order
                    # distance is the sum for now
                    if t_chan:
                        rdists[a_idx] += dists[t_chan][i_idx]
                    else:
                        rdists[a_idx] = dists[t_chan][i_idx]
                except:
                    print("Key not found in result list: ", ikey, "for query:", qkeys[t_chan])
                    raise error.BregmanError()
        #search for the index of the relevant keys
        rdists = pylab.absolute(rdists)
        sort_idx = pylab.argsort(rdists)   # Sort fields into database order
        for r in self.ground_truth: # relevant keys
            ranks_list.append(pylab.where(sort_idx==r)[0][0]) # Rank of the relevant key
        return ranks_list, rdists
Exemple #7
0
def shiftgrid(lon0,datain,lonsin,start=True):
    """ 
 shift global lat/lon grid east or west.
 assumes wraparound (or cyclic point) is included.

 lon0:  starting longitude for shifted grid 
        (ending longitude if start=False). lon0 must be on
        input grid (with the range of lonsin).
 datain:  original data.
 lonsin:  original longitudes.
 start[True]: if True, lon0 represents he starting longitude 
 of the new grid. if False, lon0 is the ending longitude.

 returns dataout,lonsout (data and longitudes on shifted grid).
    """
    if pylab.fabs(lonsin[-1]-lonsin[0]-360.) > 1.e-4:
        raise ValueError, 'cyclic point not included'
    if lon0 < lonsin[0] or lon0 > lonsin[-1]:
        raise ValueError, 'lon0 outside of range of lonsin'
    i0 = pylab.argsort(pylab.fabs(lonsin-lon0))[0]
    dataout = pylab.zeros(datain.shape,datain.typecode())
    lonsout = pylab.zeros(lonsin.shape,lonsin.typecode())
    if start:
        lonsout[0:len(lonsin)-i0] = lonsin[i0:]
    else:
        lonsout[0:len(lonsin)-i0] = lonsin[i0:]-360.
    dataout[:,0:len(lonsin)-i0] = datain[:,i0:]
    if start:
        lonsout[len(lonsin)-i0:] = lonsin[1:i0+1]+360.
    else:
        lonsout[len(lonsin)-i0:] = lonsin[1:i0+1]
    dataout[:,len(lonsin)-i0:] = datain[:,1:i0+1]
    return dataout,lonsout
Exemple #8
0
def main():
    plt.ion()

    fil = FletcherFilter()
    Niter = 12
    logp = plt.zeros((Niter,2))
    for k in range(Niter):
        while True:
            #print k
            p = plt.rand(2)
            if not fil.dominated(p):
                break
        logp[k] = p
        fil.add(p, 0.0, 0.0)
        ff = fil.values[fil.valid]
        ff = plt.r_[[[1e-6,1]], ff[plt.argsort(ff[:,0])], [[1,1e-6]]]
        ww = plt.zeros((ff.shape[0] * 2 - 1, 2))
        ww[::2] = ff
        ww[1::2,0] = ff[1:,0]
        ww[1::2,1] = ff[:-1,1]
        plt.loglog(ww[:,0], ww[:,1], '-')
    plt.loglog(logp[:,0], logp[:,1], 'ys-', lw=2)
    plt.axis([0,1,0,1])
    plt.axis('equal')
    plt.grid()
        
    code.interact()
def plot_uhecrs(phi, theta, values, cmap='jet', **kwargs):
    f = kwargs.get("figure", pylab.figure(figsize=(10, 10 * 0.75)))
    s = f.add_subplot(111, projection='hammer')
    pylab.grid(lw=1, color='0.5', alpha=0.5)
    # s.invert_xaxis()
    if values is None:
        pylab.scatter(-phi,
                      theta,
                      edgecolors=kwargs.get('edge', 'none'),
                      marker='.',
                      label="a",
                      s=kwargs.get('size', 10))
    else:
        srt = pylab.argsort(values)
        pylab.scatter(-phi[srt],
                      theta[srt],
                      c=values[srt],
                      vmin=min(values),
                      vmax=max(values),
                      edgecolors=kwargs.get('edge', '0.7'),
                      lw=0.1,
                      marker='.',
                      label="a",
                      s=kwargs.get('size', 10),
                      cmap=cmap)
        cb = pylab.colorbar(format='%g',
                            orientation='horizontal',
                            aspect=30,
                            shrink=0.8,
                            pad=0.1,
                            label=kwargs.get('valuelabel', None))
        cb.solids.set_edgecolor("face")
    pylab.gca().set_xticklabels([])
    pylab.tight_layout(pad=0.1)
    return f, s
Exemple #10
0
    def rank_by_distance_bhatt(self, qkeys, ikeys, rkeys, dists):
        """
        ::

            Reduce timbre-channel distances to ranks list by ground-truth key indices
            Bhattacharyya distance on timbre-channel probabilities and Kullback distances
        """
        # timbre-channel search using pre-computed distances
        ranks_list = []
        t_keys, t_lens = self.get_adb_lists(0) 
        rdists=pylab.ones(len(t_keys))*float('inf')
        qk = self._get_probs_tc(qkeys)
        for i in range(len(ikeys[0])): # number of include keys
            ikey=[]
            dk = pylab.zeros(self.timbre_channels)
            for t_chan in range(self.timbre_channels): # timbre channels
                ikey.append(ikeys[t_chan][i])
                try: 
                    # find dist of key i for query
                    i_idx = rkeys[t_chan].index( ikey[t_chan] ) # dataset include-key match
                    # the reduced distance function in include_keys order
                    # distance is Bhattacharyya distance on probs and dists
                    dk[t_chan] = dists[t_chan][i_idx]
                except:
                    print "Key not found in result list: ", ikey, "for query:", qkeys[t_chan]
                    raise error.BregmanError()
            rk = self._get_probs_tc(ikey)
            a_idx = t_keys.index( ikey[0] ) # audiodb include-key index
            rdists[a_idx] = distance.bhatt(pylab.sqrt(pylab.absolute(dk)), pylab.sqrt(pylab.absolute(qk*rk)))
        #search for the index of the relevant keys
        rdists = pylab.absolute(rdists)
        sort_idx = pylab.argsort(rdists)   # Sort fields into database order
        for r in self.ground_truth: # relevant keys
            ranks_list.append(pylab.where(sort_idx==r)[0][0]) # Rank of the relevant key
        return ranks_list, rdists
Exemple #11
0
def find_convex_hull(X, num_iter, num_points=None):
    """
        if num_points is set to None, find_convex_hull will return all the points in
        the convex hull (that have been found) sorted according to their sharpness.
        Otherwise, it will return the N-sharpest points.
    """
    (N, D) = X.shape
    if (num_points == None):
        num_points = N

    # randomly choose 'num_iter' direction on the unit sphere.
    # find the maximal point in the chosen direction, and add 1 to its counter.
    # only points on the convex hull will be hit, and 'sharp' corners will 
    # have more hits than 'smooth' corners.
    hits = p.zeros((N, 1))
    for j in xrange(num_iter):
        a = p.randn(D)
        a = a / p.norm(a)
        i = p.dot(X, a).argmax()
        hits[i] += 1
    
    # don't take points with 0 hits
    num_points = min(num_points, sum(p.find(hits)))
    
    # the indices of the n-best points
    o = list(p.argsort(hits, 0)[xrange(-1, -(num_points+1), -1)].flat)
    
    return X[o, :]
Exemple #12
0
    def rank_by_distance_avg(self, qkeys, ikeys, rkeys, dists):
        """
        ::

            Reduce timbre-channel distances to ranks list by ground-truth key indices
            Kullback distances
        """
        # timbre-channel search using pre-computed distances
        ranks_list = []
        t_keys, t_lens = self.get_adb_lists(0) 
        rdists=pylab.ones(len(t_keys))*float('inf')
        for t_chan in range(self.timbre_channels): # timbre channels
            t_keys, t_lens = self.get_adb_lists(t_chan) 
            for i, ikey in enumerate(ikeys[t_chan]): # include keys, results
                try: 
                    # find dist of key i for query
                    i_idx = rkeys[t_chan].index( ikey ) # lower_bounded include-key index
                    a_idx = t_keys.index( ikey ) # audiodb include-key index
                    # the reduced distance function in include_keys order
                    # distance is the sum for now
                    if t_chan:
                        rdists[a_idx] += dists[t_chan][i_idx]
                    else:
                        rdists[a_idx] = dists[t_chan][i_idx]
                except:
                    print "Key not found in result list: ", ikey, "for query:", qkeys[t_chan]
                    raise error.BregmanError()
        #search for the index of the relevant keys
        rdists = pylab.absolute(rdists)
        sort_idx = pylab.argsort(rdists)   # Sort fields into database order
        for r in self.ground_truth: # relevant keys
            ranks_list.append(pylab.where(sort_idx==r)[0][0]) # Rank of the relevant key
        return ranks_list, rdists
def sorted_points(df, col1, col2):
    x = pylab.array(df[col1])
    y = pylab.array(df[col2])
    order = pylab.argsort(x)
    x = x[order]
    y = y[order]
    return x, y
Exemple #14
0
 def FDR_BH(self, p):
     """Benjamini-Hochberg p-value correction for multiple hypothesis testing."""
     p = np.asfarray(p)
     by_descend = p.argsort()[::-1]
     by_orig = by_descend.argsort()
     steps = float(len(p)) / np.arange(len(p), 0, -1)
     q = np.minimum(1, np.minimum.accumulate(steps * p[by_descend]))
     return q[by_orig]
def gen_topic(result, t, W0, word_list, k0):
    if t in result.topics:
        return result.topics[t]

    word_weight = W0[:,t]
    word_ind = argsort(word_weight)[::-1]

    topic_node = [(word_list[w], W0[w,t]) for w in word_ind[:k0]]
    result.topics[t] = topic_node
    return topic_node
def gen_group(result, g, W1, W0, word_list, k1, k0):
    if g in result.groups:
        return result.groups[g]
    topic_weight = W1[:,g]
    topic_ind = argsort(topic_weight)[::-1]
    for t in topic_ind[:k1]:
        gen_topic(result, t, W0, word_list, k0)

    group_node = [(t, W1[t,g]) for t in topic_ind[:k1]]
    result.groups[g] = group_node
    return group_node
def gen_super(result, s, W2, W1, W0, word_list, k2, k1, k0):
    if s in result.supers:
        return result.supers[s]

    group_weight = W2[:,s]
    group_ind = argsort(group_weight)[::-1]
    for g in group_ind[:k2]:
        gen_group(result, g, W1, W0, word_list, k1, k0)

    super_node = [(g, W2[g,s]) for g in group_ind[:k2]]
    result.supers[s] = super_node
    return super_node
Exemple #18
0
    def __init__(self, logger=None, debug=True):
        hostname = socket.gethostname()
        self.port = '/dev/arduinoElAxis'
        self.baudrate = 57600
        self.debug = debug
        self.lock = threading.Lock()
        self.ser = serial.Serial()
        self.setLogger(logger)
        #prefix = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
        #if logger == None:
        #    self.lw = logWriter.logWriter(prefix, verbose=False)
        #else:
        #    self.lw = logger
        self.posD = -9999.999

        stepPole = array([
            0., 100., 200., 300., 400., 500., 600., 700., 800., 900., 1000.,
            1100., 1200., 1300., 1400., 1500., 1600., 1700., 1800., 1900.,
            2000., 2100., 2200., 2300., 2400., 2500., 2600., 2700., 2800.,
            2900., 3000., 3100., 3200., 3300., 3400.
        ])
        anglePole = array([
            93.75, 90.45, 87.8, 84.95, 82.15, 79.45, 76.85, 74.3, 71.85, 69.4,
            67., 64.65, 62.25, 60.05, 57.65, 55.35, 53.15, 50.85, 48.55, 46.2,
            43.95, 41.7, 39.3, 36.95, 34.5, 32.2, 29.6, 27.05, 24.6, 21.8,
            19.2, 16.55, 13.8, 11.15, 8.94
        ])
        stepSummit = array([
            0., 100., 200., 300., 400., 500., 600., 700., 800., 900., 1000.,
            1100., 1200., 1300., 1400., 1500., 1600., 1700., 1800., 1900.,
            2000., 2100., 2200., 2300., 2400., 2500., 2600., 2700., 2800.,
            2900., 3000., 3100., 3200.
        ])
        angleSummit = array([
            92.6, 88.85, 85.35, 82.2, 79.15, 76.35, 73.7, 70.7, 67.95, 65.25,
            62.85, 60.35, 57.7, 55.25, 52.65, 50.25, 47.6, 45.2, 42.7, 40.15,
            37.5, 35.0, 32.5, 29.55, 27.2, 24.3, 21.25, 18.5, 15.6, 12.5, 9.45,
            6.0, 2.65
        ])
        if 'wvr1' in hostname:
            angle = anglePole
            step = stepPole
        elif 'wvr2' in hostname:
            angle = angleSummit
            step = stepSummit
        q = argsort(angle)
        self.step2Angle = scipy.interpolate.interp1d(step,
                                                     angle,
                                                     kind='linear')
        self.angle2Step = scipy.interpolate.interp1d(angle[q],
                                                     step[q],
                                                     kind='linear')
Exemple #19
0
    def plot_frequencies_hyperedges(self):
        """Plot frequencies of the hyperedges amonsgt all models


        .. plot::

            >>> from cellnopt.optimiser import ASPBool
            >>> from cellnopt.data import cnodata
            >>> a = ASPBool(cnodata("PKN-ToyMMB.sif"), cnodata("MD-ToyMMB.csv"))
            >>> a.run(fit=2, gtts=False, size=2)
            >>> a.plot_frequencies_hyperedges()


        .. todo:: refine plotting to cope with xlabels being too long.

        """
        from pylab import clf, plot, xlim, grid, xlabel, ylabel, vlines, title
        from pylab import xticks, arange, argsort
        _freq = list(self.family.frequencies)
        values = [x[1] for x in _freq]
        names  = [x[0] for x in _freq]

        # let us sort them (reversed) and sort the names accordingly
        values = sorted(values, reverse=True)
        sorted_names = [names[i] for i in argsort(names)[::-1]]

        N = len(values)
        Nzeros = values.count(0)
        Nones = values.count(1)

        clf();
        plot(range(1, N+1), values, 'o-');
        xlim([1, N]);
        grid()
        vlines(Nones, 0, 1, linestyles="--")
        vlines(Nones+(N-Nzeros), 0,1, linestyles="--")
        xlabel("Hyperedges")
        xticks(arange(1,1+len(names)), sorted_names, rotation=90, fontsize=8)
        ylabel("Frequency")
        title("Frequencies of hyperedges over %s sub-optimal models \nwithin %s %% tolerance" % (len(self.models), self.fit*100))
def val_distribution(df, val, categories, plot_func, plot_descriptor,\
                     outdir=FIGS_DIR, fig_suffix=None, category_subset=None):
    for category in categories:
        subset_cols = ['neuron name', 'neuron type', 'alpha']
        if category != 'neuron type':
            subset_cols.append(category)
        df2 = df.drop_duplicates(subset=subset_cols)

        if category_subset != None:
            df2 = df2[df2[category].isin(category_subset)]
        else:
            df2 = remove_small_counts(df2, category,\
                                      min_count=CATEGORY_MIN_COUNTS[category])

        cat_vals = []
        medians = []
        for name, group in df2.groupby(category):
            cat_vals.append(name)
            medians.append(pylab.median(group[val]))
        
        cat_vals = pylab.array(cat_vals)
        mean = pylab.array(medians)
        order = pylab.argsort(medians)
        order = cat_vals[order]

        pylab.figure()
        sns.set()
        dist_plot = plot_func(x=val, y=category, data=df2, orient='h', order=order)
        dist_plot.tick_params(axis='y', labelsize=20)
        pylab.tight_layout()
        pylab.xlabel(val, fontsize=20)
        pylab.ylabel(category, fontsize=20)

        fname = '%s_%ss_%s' % (category.replace(' ', '_'),\
                              val.replace(' ', '_'), plot_descriptor)
        if fig_suffix != None:
            fname += '_%s' % fig_suffix
        pylab.savefig('%s/%s.pdf' % (outdir, fname), format='pdf')
        pylab.close()
def category_dists(df, categories, outdir=FIGS_DIR, fig_suffix=None,\
                   category_subset=None):
    for category in categories:
        df2 = df.drop_duplicates(subset=list(set(['neuron name', 'neuron type', category])))
        
        if category_subset != None:
            df2 = df2[df2[category].isin(category_subset)]
        else:
            df2 = remove_small_counts(df2, category,\
                                      min_count=CATEGORY_MIN_COUNTS[category])


        if category_subset != None:
            df2 = df2[df2[category].isin(category_subset)]

        df2['dist'] = pylab.log10(df2['dist'])
        
        
        cat_vals = []
        cat_means = []
        for cat_val, group in df2.groupby(category):
            cat_vals.append(cat_val)
            cat_mean = pylab.mean(group['dist'])
            cat_means.append(cat_mean)
        order = pylab.argsort(cat_means)
        cat_vals = pylab.array(cat_vals)
        sorted_vals = cat_vals[order]
        pylab.figure()
        sns.set()
        dist_plot = sns.barplot(x=category, y='dist', data=df2, order=sorted_vals)
        pylab.xticks(rotation='vertical', size=20)
        pylab.xlabel(category, size=20)
        pylab.ylabel('log-distance to Pareto front', size=20)
        #pylab.tight_layout()
        fname = 'pareto_dists_%s' % category.replace(' ', '_')
        if fig_suffix != None:
            fname += '_%s' % fig_suffix
        pylab.savefig('%s/%s.pdf' % (outdir, fname), bbox_inches='tight')
Exemple #22
0
def scatter_dists(df):
    df2 = df.drop_duplicates(subset='name')
    neural_dist = df2['neural_dist']
    centroid_dist = df2['centroid_dist']
    random_dist = df2['random_dist']

    assert len(neural_dist) == len(centroid_dist) == len(random_dist)

    order = pylab.argsort(neural_dist)
    neural_dist = neural_dist[order]
    centroid_dist = centroid_dist[order]
    random_dist = random_dist[order]

    x = range(len(neural_dist))
    pylab.figure()
    pylab.scatter(x, random_dist, c='m', label='random')
    pylab.scatter(x, centroid_dist, c='g', label='centroid')
    pylab.scatter(x, neural_dist, c='r', label='neural')
    pylab.ylabel('distance')
    pylab.title('Distance to Pareto Front')
    pylab.legend()
    pylab.savefig('%s/pareto_dists.pdf' % OUTDIR, format='pdf')
    pylab.close()
def zipf_law(df):
    # Plot of absolute frequency
    from pylab import arange, argsort, loglog, logspace, log10, text
    counts = df.total
    tokens = df.index
    ranks = arange(1, len(counts)+1)
    indices = argsort(-counts)
    frequencies = counts[indices]
    fig, ax = plt.subplots(figsize=(8,6))
    ax.set_ylim(1,10**6)
    ax.set_xlim(1,10**6)
    loglog(ranks, frequencies, marker=".")
    ax.plot([1,frequencies[0]],[frequencies[0],1],color='r')
    #ax.set_title("Zipf plot for phrases tokens")
    ax.set_xlabel("Frequency rank of token")
    ax.set_ylabel("Absolute frequency of token")
    ax.grid(True)
    for n in list(logspace(-0.5, log10(len(counts)-2), 15).astype(int)):
        dummy = text(ranks[n], frequencies[n], " " + tokens[indices[n]], 
                     verticalalignment="bottom",
                     horizontalalignment="left")
    ax.figure.savefig(figOutputPath / '2_zipf_law.png', format='png')
    print('Exported 2_zipf_law.png')
Exemple #24
0
def get_density_stats(dirname="sample"):
    img_stats = []
    chr_density_sort = []
    tree = os.walk(dirname)
    
    for subfol in tree:
        img_path = subfol[0]
        img_files = subfol[2]
        for img in img_files:
            img_char = img.split(".")[0]
            img_sum = sum(imread(os.path.join(img_path,img)))
#            print (os.path.join(img_path,img))
            img_stats.append(img_sum)

#            print img_sum, img_char
            chr_density_sort.append(int(img_char))

    chr_density_sort = array(chr_density_sort)
    chr_density_sort = chr_density_sort[argsort(img_stats)]

    # print chr_density_sort, len(chr_density_sort)
    print "Visual density sort obtained."
    
    return chr_density_sort
Exemple #25
0
def save_density_stats(dirname="sample"):
    img_stats = []
    chr_density_sort = []
    tree = os.walk(dirname)
    
    for subfol in tree:
        img_path = subfol[0]
        img_files = subfol[2]
        for img in img_files:
            img_char = img.split(".")[0]
            img_sum = sum(imread(os.path.join(img_path,img)))
            img_stats.append(img_sum)

            chr_density_sort.append(int(img_char))

    chr_density_sort = array(chr_density_sort)
    chr_density_sort = list(chr_density_sort[argsort(img_stats)])

    # print chr_density_sort, len(chr_density_sort)
    print "Visual density sort obtained."

    f = open('density_stats.txt', 'w')
    f.writelines("".join(map(chr, chr_density_sort)))
    f.close()
def plot_uhecrs(phi, theta, values, cmap='jet', **kwargs):
    f = kwargs.get("figure", pylab.figure(figsize=(10, 10 * 0.75)))
    s = f.add_subplot(111, projection='hammer')
    pylab.grid(lw=1, color='0.5', alpha=0.5)
    # s.invert_xaxis()
    if values is None:
        pylab.scatter(-phi, theta, edgecolors=kwargs.get('edge', 'none'),
                      marker='.', label="a", s=kwargs.get('size', 10))
    else:
        srt = pylab.argsort(values)
        pylab.scatter(-phi[srt], theta[srt], c=values[srt], vmin=min(values),
                      vmax=max(values), edgecolors=kwargs.get('edge', '0.7'),
                      lw=0.1, marker='.', label="a", s=kwargs.get('size', 10), cmap=cmap)
        cb = pylab.colorbar(
            format='%g',
            orientation='horizontal',
            aspect=30,
            shrink=0.8,
            pad=0.1,
            label=kwargs.get('valuelabel', None))
        cb.solids.set_edgecolor("face")
    pylab.gca().set_xticklabels([])
    pylab.tight_layout(pad=0.1)
    return f, s
def raster_tuning(ax):

    fullbehaviorDir = behaviorDir+subject+'/'
    behavName = subject+'_tuning_curve_'+tuningBehavior+'.h5'
    tuningBehavFileName=os.path.join(fullbehaviorDir, behavName)


    tuning_bdata = loadbehavior.BehaviorData(tuningBehavFileName,readmode='full')
    freqEachTrial = tuning_bdata['currentFreq']
    possibleFreq = np.unique(freqEachTrial)
    numberOfTrials = len(freqEachTrial)

    # -- The old way of sorting (useful for plotting sorted raster) --
    sortedTrials = []
    numTrialsEachFreq = []  #Used to plot lines after each group of sorted trials
    for indf,oneFreq in enumerate(possibleFreq): #indf is index of this freq and oneFreq is the frequency
        indsThisFreq = np.flatnonzero(freqEachTrial==oneFreq) #this gives indices of this frequency
        sortedTrials = np.concatenate((sortedTrials,indsThisFreq)) #adds all indices to a list called sortedTrials
        numTrialsEachFreq.append(len(indsThisFreq)) #finds number of trials each frequency has
    sortingInds = argsort(sortedTrials) #gives array of indices that would sort the sortedTrials

    # -- Load event data and convert event timestamps to ms --
    tuning_ephysDir = os.path.join(settings.EPHYS_PATH, subject,tuningEphys)
    tuning_eventFilename=os.path.join(tuning_ephysDir, 'all_channels.events')
    tuning_ev=loadopenephys.Events(tuning_eventFilename) #load ephys data (like bdata structure)
    tuning_eventTimes=np.array(tuning_ev.timestamps)/SAMPLING_RATE #get array of timestamps for each event and convert to seconds by dividing by sampling rate (Hz). matches with eventID and 
    tuning_evID=np.array(tuning_ev.eventID)  #loads the onset times of events (matches up with eventID to say if event 1 went on (1) or off (0)
    tuning_eventOnsetTimes=tuning_eventTimes[tuning_evID==1] #array that is a time stamp for when the chosen event happens.
    #ev.eventChannel woul load array of events like trial start and sound start and finish times (sound event is 0 and trial start is 1 for example). There is only one event though and its sound start
    while (numberOfTrials < len(tuning_eventOnsetTimes)):
        tuning_eventOnsetTimes = tuning_eventOnsetTimes[:-1]

    #######################################################################################################
    ###################THIS IS SUCH A HACK TO GET SPKDATA FROM EPHYSCORE###################################
    #######################################################################################################

    thisCell = celldatabase.CellInfo(animalName=subject,############################################
                 ephysSession = tuningEphys,
                 tuningSession = 'DO NOT NEED THIS',
                 tetrode = tetrode,
                 cluster = cluster,
                 quality = 1,
                 depth = 0,
                 tuningBehavior = 'DO NOT NEED THIS',
		 behavSession = tuningBehavior)
    
    tuning_spkData = ephyscore.CellData(thisCell)
    tuning_spkTimeStamps = tuning_spkData.spikes.timestamps

    (tuning_spikeTimesFromEventOnset,tuning_trialIndexForEachSpike,tuning_indexLimitsEachTrial) = spikesanalysis.eventlocked_spiketimes(tuning_spkTimeStamps,tuning_eventOnsetTimes,tuning_timeRange)

    #print 'numTrials ',max(tuning_trialIndexForEachSpike)#####################################
    '''
        Create a vector with the spike timestamps w.r.t. events onset.

        (spikeTimesFromEventOnset,trialIndexForEachSpike,indexLimitsEachTrial) = 
            eventlocked_spiketimes(timeStamps,eventOnsetTimes,timeRange)

        timeStamps: (np.array) the time of each spike.
        eventOnsetTimes: (np.array) the time of each instance of the event to lock to.
        timeRange: (list or np.array) two-element array specifying time-range to extract around event.

        spikeTimesFromEventOnset: 1D array with time of spikes locked to event.
    o    trialIndexForEachSpike: 1D array with the trial corresponding to each spike.
           The first spike index is 0.
        indexLimitsEachTrial: [2,nTrials] range of spikes for each trial. Note that
           the range is from firstSpike to lastSpike+1 (like in python slices)
        spikeIndices
    '''

    tuning_sortedIndexForEachSpike = sortingInds[tuning_trialIndexForEachSpike] #Takes values of trialIndexForEachSpike and finds value of sortingInds at that index and makes array. This array gives an array with the sorted index of each trial for each spike


    # -- Calculate tuning --
    #nSpikes = spikesanalysis.spiketimes_to_spikecounts(spikeTimesFromEventOnset,indexLimitsEachTrial,responseRange) #array of the number of spikes in range for each trial
    '''Count number of spikes on each trial in a given time range.

           spikeTimesFromEventOnset: vector of spikes timestamps with respect
             to the onset of the event.
           indexLimitsEachTrial: each column contains [firstInd,lastInd+1] of the spikes on a trial.
           timeRange: time range to evaluate. Spike times exactly at the limits are not counted.

           returns nSpikes
    '''
    '''
    meanSpikesEachFrequency = np.empty(len(possibleFreq)) #make empty array of same size as possibleFreq

    # -- This part will be replace by something like behavioranalysis.find_trials_each_type --
    trialsEachFreq = []
    for indf,oneFreq in enumerate(possibleFreq):
        trialsEachFreq.append(np.flatnonzero(freqEachTrial==oneFreq)) #finds indices of each frequency. Appends them to get an array of indices of trials sorted by freq

    # -- Calculate average firing for each freq --
    for indf,oneFreq in enumerate(possibleFreq):
        meanSpikesEachFrequency[indf] = np.mean(nSpikes[trialsEachFreq[indf]])
    '''
    #clf()
    #if (len(tuning_spkTimeStamps)>0):
        #ax1 = plt.subplot2grid((4,4), (3, 0), colspan=1)
        #spikesorting.plot_isi_loghist(spkData.spikes.timestamps)
        #ax3 = plt.subplot2grid((4,4), (3, 3), colspan=1)
        #spikesorting.plot_events_in_time(tuning_spkTimeStamps)
        #samples = tuning_spkData.spikes.samples.astype(float)-2**15
        #samples = (1000.0/tuning_spkData.spikes.gain[0,0]) *samples
        #ax2 = plt.subplot2grid((4,4), (3, 1), colspan=2)
        #spikesorting.plot_waveforms(samples)
    #ax4 = plt.subplot2grid((4,4), (0, 0), colspan=3,rowspan = 3)
    plot(tuning_spikeTimesFromEventOnset, tuning_sortedIndexForEachSpike, '.', ms=3)
    #axvline(x=0, ymin=0, ymax=1, color='r')

    #The cumulative sum of the list of specific frequency presentations, 
    #used below for plotting the lines across the figure. 
    numTrials = cumsum(numTrialsEachFreq)

    #Plot the lines across the figure in between each group of sorted trials
    for indf, num in enumerate(numTrials):
        ax.axhline(y = num, xmin = 0, xmax = 1, color = '0.90', zorder = 0)
       
    
    tickPositions = numTrials - mean(numTrialsEachFreq)/2
    tickLabels = ["%0.2f" % (possibleFreq[indf]/1000) for indf in range(len(possibleFreq))]
    ax.set_yticks(tickPositions)
    ax.set_yticklabels(tickLabels)
    ax.set_ylim([-1,numberOfTrials])
    ylabel('Frequency Presented (kHz), {} total trials'.format(numTrials[-1]))
    #title(ephysSession+' T{}c{}'.format(tetrodeID,clusterID))
    xlabel('Time (sec)')
    '''

    ax5 = plt.subplot2grid((4,4), (0, 3), colspan=1,rowspan=3)
    ax5.set_xscale('log')
    plot(possibleFreq,meanSpikesEachFrequency,'o-')
    ylabel('Avg spikes in window {0}-{1} sec'.format(*responseRange))
    xlabel('Frequency')
    '''
    #show()

    '''
Exemple #28
0
def main():
  import optparse
  from numpy import sum

  # Parse command line
  parser = optparse.OptionParser(usage=USAGE)
  parser.add_option("-p", "--plot", action="store_true",
                    help="Generate pdf with IR-spectrum, broadened with Lorentzian")
  parser.add_option("-i", "--info", action="store_true",
                      help="Set up/ Calculate vibrations & quit")
  parser.add_option("-s", "--suffix", action="store",
                    help="Call suffix for binary e.g. 'mpirun -n 4 '",
                    default='')
  parser.add_option("-r", "--run", action="store",
                    help="path to FHI-aims binary",default='')
  parser.add_option("-x", "--relax", action="store_true",
                    help="Relax initial geometry")
  parser.add_option("-m", "--molden", action="store_true",
                    help="Output in molden format")
  parser.add_option("-w", "--distort", action="store_true",
                    help="Output geometry distorted along imaginary modes")
  parser.add_option("-t", "--submit", action="store",
                    help="""\
Path to submission script, string <jobname>
will be replaced by name + counter, string 
                            <outfile> will be replaced by filename""")
  parser.add_option("-d", "--delta", action="store", type="float",
                    help="Displacement", default=0.0025)
  parser.add_option("-b", "--broadening", action="store", type="float",
                    help="Broadening for IR-spectrum in cm^{-1}", default=5)

  options, args = parser.parse_args()
  if options.info:
      print __doc__
      sys.exit(0)
  if len(args) != 2:
      parser.error("Need exactly two arguments")
  
  AIMS_CALL=options.suffix+' '+options.run
  hessian_thresh = -1
  name=args[0]
  mode=args[1] 
  delta=options.delta
  broadening=options.broadening

  run_aims=False
  if options.run!='': run_aims=True
  
  submit_script = options.submit is not None
  
  if options.plot:
    import matplotlib as mpl
    mpl.use('Agg') 
    from pylab import figure

  if options.plot or mode=='1' or mode=='2':
    from pylab import savetxt, transpose, eig, argsort, sort,\
		      sign, pi, dot, sum, linspace, argmin, r_, convolve
		 
  # Constant from scipy.constants
  bohr=constants.value('Bohr radius')*1.e10
  hartree=constants.value('Hartree energy in eV')
  at_u=constants.value('atomic mass unit-kilogram relationship')
  eV=constants.value('electron volt-joule relationship')
  c=constants.value('speed of light in vacuum')
  Ang=1.0e-10
  hbar=constants.value('Planck constant over 2 pi')
  Avo=constants.value('Avogadro constant')
  kb=constants.value('Boltzmann constant in eV/K')
  pi=constants.pi
  hessian_factor   = eV/(at_u*Ang*Ang) 
  grad_dipole_factor=(eV/(1./(10*c)))/Ang  #(eV/Ang -> D/Ang)
  ir_factor = 1
  
  # Asign all filenames
  inputgeomerty = 'geometry.in.'+name
  inputcontrol  = 'control.in.'+name
  atomicmasses  = 'masses.'+name+'.dat'; 
  xyzfile       = name+'.xyz';
  moldenname    =name+'.molden';
  hessianname   = 'hessian.'+name+'.dat'; 
  graddipolename   = 'grad_dipole.'+name+'.dat'; 
  irname   = 'ir.'+name+'.dat'; 
  deltas=array([-delta,delta])
  coeff=array([-1,1])
  c_zero = - 1. / (2. * delta)


  f=open('control.in','r')                   # read control.in template
  template_control=f.read()
  f.close

  if submit_script:
    f=open(options.submit,'r')               # read submission script template
    template_job=f.read()
    f.close

  folder=''                                  # Dummy
  ########### Central Point ##################################################
  if options.relax and (mode=='0' or mode=='2'):
    # First relax input geometry
    filename=name+'.out'
    folder=name+'_relaxation' 
    if not os.path.exists(folder): os.mkdir(folder)            # Create folder
    shutil.copy('geometry.in', folder+'/geometry.in')          # Copy geometry
    new_control=open(folder+'/control.in','w')
    new_control.write(template_control+'relax_geometry trm 1E-3\n') # Relax!
    new_control.close()
    os.chdir(folder)                             # Change directoy
    print 'Central Point'
    if run_aims:
      os.system(AIMS_CALL+' > '+filename)       # Run aims and pipe the output 
						#  into a file named 'filename'
    if submit_script: replace_submission(template_job, name, 0, filename)
    os.chdir('..') 

  ############################################################################
  # Check for relaxed geometry
  if os.path.exists(folder+'/geometry.in.next_step'):  
    geometry=open(folder+'/geometry.in.next_step','r')
  else:
    geometry=open('geometry.in','r')
    
  # Read input geometry  
  n_line=0
  struc=structure()
  lines=geometry.readlines()

  for line in lines:
    n_line= n_line+1
    if line.rfind('set_vacuum_level')!=-1:   # Vacuum Level
      struc.vacuum_level=float(split_line(line)[-1])
    if line.rfind('lattice_vector')!=-1:    # Lattice vectors and periodic
      lat=split_line(line)[1:]
      struc.lattice_vector=append(struc.lattice_vector,float64(array(lat))
			  [newaxis,:],axis=0)
      struc.periodic=True
    if line.rfind('atom')!=-1:              # Set atoms
      line_vals=split_line(line)
      at=Atom(line_vals[-1],line_vals[1:-1])
      if n_line<len(lines):
	  nextline=lines[n_line]
	  if nextline.rfind('constrain_relaxation')!=-1: # constrained?
	    at=Atom(line_vals[-1],line_vals[1:-1],True)
	  else:
	    at=Atom(line_vals[-1],line_vals[1:-1])
      struc.join(at)         
  geometry.close()  
  n_atoms= struc.n()
  n_constrained=n_atoms-sum(struc.constrained)

  # Atomic mass file
  mass_file=open(atomicmasses,'w')
  mass_vector=zeros([0])
  for at_unconstrained in struc.atoms[struc.constrained==False]:
      mass_vector=append(mass_vector,ones(3)*1./sqrt(at_unconstrained.mass()))
      line='{0:10.5f}'.format(at_unconstrained.mass())
      for i in range(3):
	line=line+'{0:11.4f}'.format(at_unconstrained.coord[i])
      line=line+'{0:}\n'.format(at_unconstrained.kind)
      mass_file.writelines(line)
  mass_file.close()

  # Init
  dip = zeros([n_constrained*3,3])
  hessian = zeros([n_constrained*3,n_constrained*3])
  index=0
  counter=1
  
  # Set up / Read folders for displaced atoms
  for atom in arange(n_atoms)[struc.constrained==False]:
    for coord in arange(3):
      for delta in deltas:
	filename=name+'.i_atom_'+str(atom)+'.i_coord_'+str(coord)+'.displ_'+\
		str(delta)+'.out'
	folder=name+'.i_atom_'+str(atom)+'.i_coord_'+str(coord)+'.displ_'+\
		str(delta)       
	if mode=='0' or mode=='2':   # Put new geometry and control.in into folder
	  struc_new=copy.deepcopy(struc)
	  struc_new.atoms[atom].coord[coord]=\
	                              struc_new.atoms[atom].coord[coord]+delta
	  geoname='geometry.i_atom_'+str(atom)+'.i_coord_'+str(coord)+\
	          '.displ_'+str(delta)+'.in'
	  if not os.path.exists(folder): os.mkdir(folder)
	  new_geo=open(folder+'/geometry.in','w')
	  newline='#\n# temporary structure-file for finite-difference '+\
		  'calculation of forces\n'
	  newline=newline+'# displacement {0:8.4f} of \# atom '.format(delta)+\
			  '{0:5} direction {1:5}\n#\n'.format(atom,coord)
	  new_geo.writelines(newline+struc_new.to_str())
	  new_geo.close()
	  new_control=open(folder+'/control.in','w')
	  template_control=template_control.replace('relax_geometry',
	                                           '#relax_geometry')
	  new_control.write(template_control+'compute_forces .true. \n'+\
			    'final_forces_cleaned '+\
			    '.true. \n')
	  new_control.close()
	  os.chdir(folder)                                   # Change directoy
	  print 'Processing atom: '+str(atom+1)+'/'+str(n_atoms)+', coord.: '+\
				 str(coord+1)+'/'+str(3)+', delta: '+str(delta)
	  if run_aims:                           
	    os.system(AIMS_CALL+' > '+filename)# Run aims and pipe the output 
						#  into a file named 'filename'
	  if submit_script: replace_submission(template_job, name, counter, 
	                                       filename)
	  # os.system('qsub job.sh') # Mind the environment variables
	  os.chdir('..') 

	if mode=='1' or mode=='2':   # Read output 
	  forces_reached=False
	  atom_count=0
	  data=open(folder+'/'+filename)
	  for line in data.readlines():
	    if line.rfind('Dipole correction potential jump')!=-1:
	      dip_jump = float(split_line(line)[-2]) # Periodic
	    if line.rfind('| Total dipole moment [eAng]')!=-1:
	      dip_jump = float64(split_line(line)[-3:]) # Cluster
	    if forces_reached and atom_count<n_atoms: # Read Forces
	      struc.atoms[atom_count].force=float64(split_line(line)[2:])
	      atom_count=atom_count+1
	      if atom_count==n_atoms:
		forces_reached=False
	    if line.rfind('Total atomic forces')!=-1:
	      forces_reached=True
	  data.close()
	  if struc.periodic:
	    pass
      #dip[index,2]=dip[index,2]+dip_jump*coeff[deltas==delta]*c_zero
	  else:
	    dip[index,:]=dip[index,:]+dip_jump*coeff[deltas==delta]*c_zero
	  forces=array([])
	  for at_unconstrained in struc.atoms[struc.constrained==False]:
	    forces=append(forces,coeff[deltas==delta]*at_unconstrained.force)
	  hessian[index,:]=hessian[index,:]+forces*c_zero
	counter=counter+1
      index=index+1  
  if mode=='1' or mode=='2': # Calculate vibrations
    print 'Entering hessian diagonalization'
    print 'Number of atoms                = '+str(n_atoms)
    print 'Name of Hessian input file     = '+hessianname
    print 'Name of grad dipole input file = '+graddipolename
    print 'Name of Masses  input file     = '+atomicmasses
    print 'Name of XYZ output file        = '+xyzfile
    print 'Threshold for Matrix elements  = '+str(hessian_thresh)
    if (hessian_thresh < 0.0): print '     All matrix elements are taken'+\
				    ' into account by default\n'
    savetxt(hessianname,hessian)
    savetxt(graddipolename,dip)

    mass_mat=mass_vector[:,newaxis]*mass_vector[newaxis,:]
    hessian[abs(hessian)<hessian_thresh]=0.0
    hessian=hessian*mass_mat*hessian_factor
    hessian=(hessian+transpose(hessian))/2.
    # Diagonalize hessian (scipy)
    print 'Solving eigenvalue system for Hessian Matrix'
    freq, eig_vec = eig(hessian)
    print 'Done ... '
    eig_vec=eig_vec[:,argsort(freq)]
    freq=sort(sign(freq)*sqrt(abs(freq)))
    ZPE=hbar*(freq)/(2.0*eV)
    freq = (freq)/(200.*pi*c)
    
    
    grad_dipole = dip * grad_dipole_factor
    eig_vec = eig_vec*mass_vector[:,newaxis]*ones(len(mass_vector))[newaxis,:]
    infrared_intensity = sum(dot(transpose(grad_dipole),eig_vec)**2,axis=0)*\
                         ir_factor
    reduced_mass=sum(eig_vec**2,axis=0)
    norm = sqrt(reduced_mass)
    eig_vec = eig_vec/norm
    
    # The rest is output, xyz, IR,...
    print 'Results\n'
    print 'List of all frequencies found:'
    print 'Mode number      Frequency [cm^(-1)]   Zero point energy [eV]   '+\
          'IR-intensity [D^2/Ang^2]'
    for i in range(len(freq)):
      print '{0:11}{1:25.8f}{2:25.8f}{3:25.8f}'.format(i+1,freq[i],ZPE[i],
                                                       infrared_intensity[i])
    print '\n'
    print 'Summary of zero point energy for entire system:'
    print '| Cumulative ZPE               = {0:15.8f} eV'.format(sum(ZPE))
    print '| without first six eigenmodes = {0:15.8f} eV\n'.format(sum(ZPE)-
                                                                 sum(ZPE[:6]))
    print 'Stability checking - eigenvalues should all be positive for a '+\
           'stable structure. '
    print 'The six smallest frequencies should be (almost) zero:'
    string=''
    for zz in ZPE[:6]: string=string+'{0:25.8f}'.format(zz)
    print string
    print 'Compare this with the largest eigenvalue, '
    print '{0:25.8f}'.format(freq[-1])
    
    nums=arange(n_atoms)[struc.constrained==False]
    nums2=arange(n_atoms)[struc.constrained]
    newline=''
    newline_ir='[INT]\n'
    if options.molden:
      newline_molden='[Molden Format]\n[GEOMETRIES] XYZ\n'
      newline_molden=newline_molden+'{0:6}\n'.format(n_atoms)+'\n'
      for i_atoms in range(n_constrained):
	newline_molden=newline_molden+'{0:6}'.format(
	                                      struc.atoms[nums[i_atoms]].kind)
	for i_coord in range(3):
	  newline_molden=newline_molden+'{0:10.4f}'.format(
	                            struc.atoms[nums[i_atoms]].coord[i_coord])
	newline_molden=newline_molden+'\n'
      newline_molden=newline_molden+'[FREQ]\n'   
      for i in range(len(freq)):
	newline_molden=newline_molden+'{0:10.3f}\n'.format(freq[i])
      newline_molden=newline_molden+'[INT]\n' 
      for i in range(len(freq)):
	newline_molden=newline_molden+'{0:17.6e}\n'.format(
	                                                infrared_intensity[i])
      newline_molden=newline_molden+'[FR-COORD]\n'
      newline_molden=newline_molden+'{0:6}\n'.format(n_atoms)+'\n'
      for i_atoms in range(n_constrained):
	newline_molden=newline_molden+'{0:6}'.format(
	                                      struc.atoms[nums[i_atoms]].kind)
	for i_coord in range(3):
	  newline_molden=newline_molden+'{0:10.4f}'.format(
	                       struc.atoms[nums[i_atoms]].coord[i_coord]/bohr)
	newline_molden=newline_molden+'\n'
      newline_molden=newline_molden+'[FR-NORM-COORD]\n'
    
    for i in range(len(freq)):
      newline=newline+'{0:6}\n'.format(n_atoms)
      if freq[i]>0:
	newline=newline+'stable frequency at '
      elif freq[i]<0:
	newline=newline+'unstable frequency at '
	if options.distort and freq[i]<-50:
	  struc_new=copy.deepcopy(struc)
	  for i_atoms in range(n_constrained):
	    for i_coord in range(3):
	      struc_new.atoms[i_atoms].coord[i_coord]=\
	      struc_new.atoms[i_atoms].coord[i_coord]+\
		    eig_vec[(i_atoms)*3+i_coord,i]                        
	  geoname=name+'.distorted.vibration_'+str(i+1)+'.geometry.in'
	  new_geo=open(geoname,'w')
	  newline_geo='#\n# distorted structure-file for based on eigenmodes\n'
	  newline_geo=newline_geo+\
	          '# vibration {0:5} :{1:10.3f} 1/cm\n#\n'.format(i+1,freq[i])
	  new_geo.writelines(newline_geo+struc_new.to_str())
	  new_geo.close()
      elif freq[i]==0:
	newline=newline+'translation or rotation '
      newline=newline+'{0:10.3f} 1/cm IR int. is '.format(freq[i])
      newline=newline+'{0:10.4e} D^2/Ang^2; red. mass is '.format(
                                                        infrared_intensity[i])
      newline=newline+'{0:5.3f} a.m.u.; force const. is '.format(
                                                          1.0/reduced_mass[i])
      newline=newline+'{0:5.3f} mDyne/Ang.\n'.format(((freq[i]*(200*pi*c))**2)*
	      (1.0/reduced_mass[i])*at_u*1.e-2)
      if options.molden: newline_molden=newline_molden+\
                                               'vibration {0:6}\n'.format(i+1)
      for i_atoms in range(n_constrained):
	newline=newline+'{0:6}'.format(struc.atoms[nums[i_atoms]].kind)
	for i_coord in range(3):
	  newline=newline+'{0:10.4f}'.format(
	                            struc.atoms[nums[i_atoms]].coord[i_coord])
	for i_coord in range(3):
	  newline=newline+'{0:10.4f}'.format(eig_vec[(i_atoms)*3+i_coord,i])
	  if options.molden: newline_molden=newline_molden+'{0:10.4f}'.format(
	                     eig_vec[(i_atoms)*3+i_coord,i]/bohr)
	newline=newline+'\n'
	if options.molden: newline_molden=newline_molden+'\n'
      for i_atoms in range(n_atoms-n_constrained):
	newline=newline+'{0:6}'.format(struc.atoms[nums2[i_atoms]].kind)
	for i_coord in range(3):
	  newline=newline+'{0:10.4f}'.format(
	                           struc.atoms[nums2[i_atoms]].coord[i_coord])
	for i_coord in range(3):
	  newline=newline+'{0:10.4f}'.format(0.0)
	newline=newline+'\n'
      newline_ir=newline_ir+'{0:10.4e}\n'.format(infrared_intensity[i])
    xyz=open(xyzfile,'w')
    xyz.writelines(newline)
    xyz.close()
    ir=open(irname,'w')
    ir.writelines(newline_ir)
    ir.close()
    if options.molden:
      molden=open(moldenname,'w')
      molden.writelines(newline_molden)
      molden.close()
    
    if (mode=='1' or mode=='2') and options.plot:
      x=linspace(freq.min()-500,freq.max()+500,1000)
      z=zeros(len(x))
      for i in range(len(freq)):
	z[argmin(abs(x-freq[i]))]=infrared_intensity[i]
      window_len=150
      lorentzian=lorentz(pi,broadening,arange(250))#signal.gaussian(window_len,broadening)
      s=r_[z[window_len-1:0:-1],z,z[-1:-window_len:-1]]
      z_convolve=convolve(lorentzian/lorentzian.sum(),s,mode='same')[
	                                           window_len-1:-window_len+1]
      fig=figure(0)
      ax=fig.add_subplot(111)
      ax.plot(x,z_convolve,'r',lw=2)
      ax.set_xlim([freq.min()-500,freq.max()+500])
      ax.set_ylim([-0.01,ax.get_ylim()[1]])
      ax.set_yticks([])
      ax.set_xlabel('Frequency [1/cm]',size=20)
      ax.set_ylabel('Intensity [a.u.]',size=20)
      fig.savefig(name+'_IR_spectrum.pdf')
      
    print '\n Done. '
Exemple #29
0
def plot_mea(neuron_dict, ext_sim_dict, neural_sim_dict):
    pl.close('all')
    fig_all = pl.figure(figsize=[15, 15])
    ax_all = fig_all.add_axes([0.1, 0.1, 0.8, 0.8], frameon=False)
    for elec in xrange(len(ext_sim_dict['elec_z'])):
        ax_all.plot(ext_sim_dict['elec_z'][elec], ext_sim_dict['elec_y'][elec], color='b',\
                marker='$E%i$'%elec, markersize=20 )
    legends = []
    for i, neur in enumerate(neuron_dict):
        folder = os.path.join(neural_sim_dict['output_folder'],
                              neuron_dict[neur]['name'])
        coor = np.load(os.path.join(folder, 'coor.npy'))
        x, y, z = coor
        n_compartments = len(x)
        fig = pl.figure(figsize=[10, 10])
        ax = fig.add_axes([0.1, 0.1, 0.8, 0.8], frameon=False)
        # Plot the electrodes
        for elec in xrange(len(ext_sim_dict['elec_z'])):
            ax.plot(ext_sim_dict['elec_z'][elec], ext_sim_dict['elec_y'][elec], color='b',\
                   marker='$%i$'%elec, markersize=20 )
        # Plot the neuron
        xmid, ymid, zmid = np.load(folder + '/coor.npy')
        xstart, ystart, zstart = np.load(folder + '/coor_start.npy')
        xend, yend, zend = np.load(folder + '/coor_end.npy')
        diam = np.load(folder + '/diam.npy')
        length = np.load(folder + '/length.npy')
        n_compartments = len(diam)
        for comp in xrange(n_compartments):
            if comp == 0:
                xcoords = pl.array([xmid[comp]])
                ycoords = pl.array([ymid[comp]])
                zcoords = pl.array([zmid[comp]])
                diams = pl.array([diam[comp]])
            else:
                if zmid[comp] < 0.400 and zmid[comp] > -.400:
                    xcoords = pl.r_[
                        xcoords,
                        pl.linspace(xstart[comp], xend[comp], length[comp] *
                                    3 * 1000)]
                    ycoords = pl.r_[
                        ycoords,
                        pl.linspace(ystart[comp], yend[comp], length[comp] *
                                    3 * 1000)]
                    zcoords = pl.r_[
                        zcoords,
                        pl.linspace(zstart[comp], zend[comp], length[comp] *
                                    3 * 1000)]
                    diams = pl.r_[
                        diams,
                        pl.linspace(diam[comp], diam[comp], length[comp] * 3 *
                                    1000)]
        argsort = pl.argsort(-xcoords)
        ax.scatter(zcoords[argsort],
                   ycoords[argsort],
                   s=20 * (diams[argsort] * 1000)**2,
                   c=xcoords[argsort],
                   edgecolors='none',
                   cmap='gray')
        ax_all.plot(zmid[0],
                    ymid[0],
                    marker='$%i$' % i,
                    markersize=20,
                    label='%i: %s' % (i, neur))
        #legends.append('%i: %s' %(i, neur))
        ax.axis(ext_sim_dict['plot_range'])
        ax.axis('equal')
        ax.axis(ext_sim_dict['plot_range'])
        ax.set_xlabel('z [mm]')
        ax.set_ylabel('y [mm]')
        fig.savefig(os.path.join(neural_sim_dict['output_folder'],\
                  'neuron_figs', '%s.png' % neur))
    ax_all.axis('equal')
    ax.axis(ext_sim_dict['plot_range'])
    ax_all.set_xlabel('z [mm]')
    ax_all.set_ylabel('y [mm]')
    ax_all.legend()
    fig_all.savefig(os.path.join(neural_sim_dict['output_folder'], 'fig.png'))
    def __init__(self, logger=None, debug=True):
        hostname = S.socket.gethostname()
        self.ip = '192.168.168.233'  # IP address of az/el arduino
        self.port = 4321
        self.debug = debug
        self.lock = threading.Lock()
        self.setLogger(logger)

        # az conversion
        self.step2AngleAz = lambda step: 360. * step / 4800.
        self.angle2StepAz = lambda angle: int(angle * 4800. / 360)
        # for reference Az
        #spd = 12.  # nominal observing speed in deg/s. hardwired in Arduino
        #usteps_per_step = 8.
        #steps_per_motor_rev = 200.
        #gear_ratio = 3.
        #usteps_per_peri_rev = usteps_per_step * steps_per_motor_rev * gear_ratio

        # el conversion
        stepPole = array([
            0., 100., 200., 300., 400., 500., 600., 700., 800., 900., 1000.,
            1100., 1200., 1300., 1400., 1500., 1600., 1700., 1800., 1900.,
            2000., 2100., 2200., 2300., 2400., 2500., 2600., 2700., 2800.,
            2900.
        ])

        # new Nov 2017 calibration results
        # measured up and down going. Using only down going
        # fit a cubic polynomial to angle and interpolated to stepPole positions
        # to remove measurement error.
        anglePole = array([
            90.2, 87., 83.9, 80.9, 78., 75.2, 72.4, 69.7, 67., 64.4, 61.8,
            59.3, 56.8, 54.3, 51.8, 49.4, 46.9, 44.5, 42., 39.5, 37.1, 34.5,
            32., 29.4, 26.8, 24.1, 21.4, 18.6, 15.8, 12.9
        ])

        # used for 2015 and 2016. New angle calibration in Nov 2017.
        # anglePole = array([93.75, 90.45, 87.8, 84.95, 82.15,
        #                   79.45  , 76.85, 74.3, 71.85, 69.4, 67.,
        #                   64.65, 62.25, 60.05, 57.65, 55.35, 53.15,
        #                   50.85, 48.55, 46.2, 43.95, 41.7, 39.3, 36.95,
        #                   34.5, 32.2, 29.6, 27.05, 24.6,
        #                    21.8, 19.2, 16.55, 13.8, 11.15, 8.94])

        stepSummit = array([
            0., 100., 200., 300., 400., 500., 600., 700., 800., 900., 1000.,
            1100., 1200., 1300., 1400., 1500., 1600., 1700., 1800., 1900.,
            2000., 2100., 2200., 2300., 2400., 2500., 2600., 2700., 2800.,
            2900., 3000., 3100., 3200.
        ])
        angleSummit = array([
            92.6, 88.85, 85.35, 82.2, 79.15, 76.35, 73.7, 70.7, 67.95, 65.25,
            62.85, 60.35, 57.7, 55.25, 52.65, 50.25, 47.6, 45.2, 42.7, 40.15,
            37.5, 35.0, 32.5, 29.55, 27.2, 24.3, 21.25, 18.5, 15.6, 12.5, 9.45,
            6.0, 2.65
        ])
        if 'wvr1' in hostname:
            angle = anglePole
            step = stepPole
        elif 'wvr2' in hostname:
            angle = angleSummit
            step = stepSummit
        else:
            angle = angleSummit
            step = stepSummit
        q = argsort(angle)
        self.step2AngleEl = scipy.interpolate.interp1d(step,
                                                       angle,
                                                       kind='linear')
        self.angle2StepEl = scipy.interpolate.interp1d(angle[q],
                                                       step[q],
                                                       kind='linear')
        self.initPort()
def scatter_dists(models_df, outdir=FIGS_DIR):
    df = models_df[['plant', 'model', 'dist']]
    model_dists = defaultdict(list)
    unique_plants = 0
    for name, group in df.groupby('plant'):
        if len(group['model'].unique()) != 4:
            print group
        for model, group2 in group.groupby('model'):
            #group2 = group2.head(n=20)
            model_dists[model].append(pylab.mean(group2['dist']))
        unique_plants += 1

    print "-------------"
    print "unique scatter plants", unique_plants
    print "-------------"
    
    order = pylab.argsort(model_dists['plant'])
    
    model_colors = {'plant' : 'r', 'centroid' : 'g', 'random' : 'm', 'barabasi' : 'c'}
    model_markers = {'plant' : 'x', 'centroid' : 'o', 'random' : '^', 'barabasi' : 's'}
    model_labels = {'plant': 'Plant arbor', 'centroid' : 'Centroid', 'random' : 'Random', 'barabasi' : 'Barabasi-Albert'}
   
    max_dist = float('-inf')
    min_dist = float('inf')
    
    pylab.figure()
    sns.set()
    
    for model, dists in model_dists.iteritems():
        dists = pylab.array(dists)
        y = dists[order]
        if LOG_DIST:
            y = pylab.log10(y)

        #y = y[::5]
        x = pylab.arange(len(y))

        max_dist = max(max_dist, max(y))
        min_dist = min(min_dist, min(y))

        color = model_colors[model]
        marker = model_markers[model]
        label = model_labels[model]
        pylab.scatter(x, y, label=label, c=color, marker=marker)
    
    
    pylab.xlabel('plant index', fontsize=20)
    ylab = 'distance to Pareto front'
    if LOG_DIST:
        ylab = 'log(' + ylab + ')'
    pylab.ylabel(ylab, fontsize=20)
    pylab.xticks(fontsize=20)
    pylab.yticks(fontsize=20)
    leg = pylab.legend(ncol=2, frameon=True)
    leg.get_frame().set_linewidth(5)
    leg.get_frame().set_edgecolor('k')
    ax = pylab.gca()
    pylab.setp(ax.get_legend().get_texts(), fontsize=20) # for legend text
    pylab.ylim(min_dist - 0.1, max_dist + 0.6)

    pylab.tight_layout()
    pylab.savefig('%s/pareto_dists.pdf' % outdir, format='pdf')
def Main():
    options, _ = MakeOpts().parse_args(sys.argv)
    assert options.experiment_id and options.plate_ids and options.reading_label
    plates = map(str.strip, options.plate_ids.split(','))
    
    print 'Reading plates %s from experiment %s' % (', '.join(plates),
                                                    options.experiment_id)
    
    db = MySQLDatabase(host='hldbv02', user='******', 
                       passwd='a1a1a1', db='tecan')

    print 'Calculating growth rates'
    growth_calc = growth.SlidingWindowGrowthCalculator(window_size=options.window_size,
                                                       minimum_level=options.lower_bound,
                                                       maximum_level=options.upper_bound)
    
    plate_names = {'1': 'Glucose',
                   '2': 'Gluconate'}
    colormap = {'1': 'k',
                '2': 'r'}
    f1 = pylab.figure(0)
    for plate_id in plates:
        p = Plate96.FromDatabase(db, options.experiment_id, plate_id)
        rates, unused_stationaries = growth_calc.CalculatePlateGrowth(
            p, options.reading_label)
        mean_rates = MeanWithConfidenceIntervalDict(rates)
        
        means = []
        errors = []
        concs = []
        for label in mean_rates.keys():
            conc = TryFloat(label)
            if conc is False:
                continue
            
            mean, error = mean_rates[label]
            means.append(mean)
            errors.append(error)
            concs.append(conc)
        
        means = pylab.array(means)
        errors = pylab.array(errors)
        concs = pylab.array(concs)
        
        max_mean = max(means)
        norm_means = means / max_mean
        norm_errors = errors / max_mean
        
        label = plate_names[plate_id]
        color = colormap[plate_id]
        """
        pylab.subplot(121)
        pylab.plot(concs, norm_means, color=color, linestyle='None',
                   marker='.', label=label)
        pylab.errorbar(concs, norm_means, yerr=norm_errors, ecolor=color,
                       fmt=None)
        """
        
        #pylab.subplot(122)
        pcts = concs*100
        idx = pylab.find(pcts > 1e-4)
        my_pcts = pcts[idx]
        my_means = means[idx]
        my_errs = errors[idx]
        
        order = pylab.argsort(my_pcts)
        my_pcts = my_pcts[order]
        my_means = my_means[order]
        my_errs = my_errs[order]
        
        pylab.plot(my_pcts, my_means, color=color, linestyle='None',
                   linewidth=4, marker='.', markersize=15, label=label)
        pylab.errorbar(my_pcts, my_means, yerr=my_errs, ecolor=color,
                       fmt=None, linewidth=1)
        
        smoothed = smoothing.WeightedAverageSmoother(pylab.log(my_pcts), my_means,
                                                     sigma=0.7)
        log_xs = pylab.arange(pylab.log(1e-4), pylab.log(2.2), 1e-3)
        xs = pylab.exp(log_xs)
        ys = smoothed(log_xs)
        pylab.plot(xs, ys, color=color, linewidth=3, linestyle='--')
        
        
    
    """            
    pylab.subplot(121)
    pylab.xlabel('CAP Concentration (fraction of standard concentration)')
    pylab.ylabel('Relative Specific Growth Rate (/hour)')
    pylab.xlim(-0.1,0.2)
    """
    
    """
    pylab.subplot(122)
    pylab.xlabel('CAP Concentration (fraction of standard concentration)')
    pylab.ylabel('Absolute Specific Growth Rate (/hour)')
    """
    
    pylab.xscale('log')
    pylab.xlabel('Substrate concentration (m/v %)')
    pylab.ylabel('Specific Growth Rate (/hour)')
    
    #pylab.xlim(-0.1,0.2)
    pylab.legend(loc='upper left')
    pylab.show()
def Main():
    options, _ = MakeOpts().parse_args(sys.argv)
    assert options.experiment_id and options.plate_ids and options.reading_label
    plates = map(str.strip, options.plate_ids.split(','))

    print 'Reading plates %s from experiment %s' % (', '.join(plates),
                                                    options.experiment_id)

    db = MySQLDatabase(host='hldbv02',
                       user='******',
                       passwd='a1a1a1',
                       db='tecan')

    print 'Calculating growth rates'
    growth_calc = growth.SlidingWindowGrowthCalculator(
        window_size=options.window_size,
        minimum_level=options.lower_bound,
        maximum_level=options.upper_bound)

    plate_names = {'1': 'Glucose', '2': 'Gluconate'}
    colormap = {'1': 'k', '2': 'r'}
    f1 = pylab.figure(0)
    for plate_id in plates:
        p = Plate96.FromDatabase(db, options.experiment_id, plate_id)
        rates, unused_stationaries = growth_calc.CalculatePlateGrowth(
            p, options.reading_label)
        mean_rates = MeanWithConfidenceIntervalDict(rates)

        means = []
        errors = []
        concs = []
        for label in mean_rates.keys():
            conc = TryFloat(label)
            if conc is False:
                continue

            mean, error = mean_rates[label]
            means.append(mean)
            errors.append(error)
            concs.append(conc)

        means = pylab.array(means)
        errors = pylab.array(errors)
        concs = pylab.array(concs)

        max_mean = max(means)
        norm_means = means / max_mean
        norm_errors = errors / max_mean

        label = plate_names[plate_id]
        color = colormap[plate_id]
        """
        pylab.subplot(121)
        pylab.plot(concs, norm_means, color=color, linestyle='None',
                   marker='.', label=label)
        pylab.errorbar(concs, norm_means, yerr=norm_errors, ecolor=color,
                       fmt=None)
        """

        #pylab.subplot(122)
        pcts = concs * 100
        idx = pylab.find(pcts > 1e-4)
        my_pcts = pcts[idx]
        my_means = means[idx]
        my_errs = errors[idx]

        order = pylab.argsort(my_pcts)
        my_pcts = my_pcts[order]
        my_means = my_means[order]
        my_errs = my_errs[order]

        pylab.plot(my_pcts,
                   my_means,
                   color=color,
                   linestyle='None',
                   linewidth=4,
                   marker='.',
                   markersize=15,
                   label=label)
        pylab.errorbar(my_pcts,
                       my_means,
                       yerr=my_errs,
                       ecolor=color,
                       fmt=None,
                       linewidth=1)

        smoothed = smoothing.WeightedAverageSmoother(pylab.log(my_pcts),
                                                     my_means,
                                                     sigma=0.7)
        log_xs = pylab.arange(pylab.log(1e-4), pylab.log(2.2), 1e-3)
        xs = pylab.exp(log_xs)
        ys = smoothed(log_xs)
        pylab.plot(xs, ys, color=color, linewidth=3, linestyle='--')
    """            
    pylab.subplot(121)
    pylab.xlabel('CAP Concentration (fraction of standard concentration)')
    pylab.ylabel('Relative Specific Growth Rate (/hour)')
    pylab.xlim(-0.1,0.2)
    """
    """
    pylab.subplot(122)
    pylab.xlabel('CAP Concentration (fraction of standard concentration)')
    pylab.ylabel('Absolute Specific Growth Rate (/hour)')
    """

    pylab.xscale('log')
    pylab.xlabel('Substrate concentration (m/v %)')
    pylab.ylabel('Specific Growth Rate (/hour)')

    #pylab.xlim(-0.1,0.2)
    pylab.legend(loc='upper left')
    pylab.show()
Exemple #34
0
def cluster_imgs_pca_kmeans(imgpaths, bb_map=None, k=2, N=3, do_align=True):
    """ Using PCA and K-means, cluster the imgpaths into 'k' clusters,
    using the first 'N' principal components.
    Algorithm details:
        Input: Set of image patches A, of size NxM
        0.) Discretize the image patch into K N'xM' equally-sized slices.
        1.) Using the discretized image patches A', run PCA to extract
            the slices S that maximize the variance
        2.) Run k-means (k=2) on the slices S.
    Input:
        list imgpaths: (imgpath_i, ...)
        dict bb_map: If you want to only cluster based on a sub-region
                     of each image, pass in 'bb_map', which is:
                         {str imgpath: (y1,y2,x1,x2)}
        int k: number of clusters
        int N: Number of principle components to use. (NOT USED)
    Output:
        dict clusters, maps {str clusterID: [imgpath_i, ...]}
    """
    data = imgpaths_to_mat(imgpaths, bb_map=bb_map, do_align=do_align)
    '''
    if bb_map is None:
        bb_map = {}
        h_big, w_big = get_largest_img_dims(imgpaths)
    else:
        bb_big = get_largest_bb(bb_map.values())
        h_big = int(abs(bb_big[0] - bb_big[1]))
        w_big = int(abs(bb_big[2] - bb_big[3]))
    # TEMP: Cut off first half, last quarter
    # w_big = (w_big / 2) - (w_big / 4)
    # 0.) First, convert images into MxN array, where M is the number
    #     of images, and N is the number of pixels of each image.
    data = np.zeros((len(imgpaths), h_big*w_big))
    for row, imgpath in enumerate(imgpaths):
        img = scipy.misc.imread(imgpath, flatten=True)
        # TEMP: Cut off first half
        # img = img[:, img.shape[1]/2:(3*img.shape[1])/4]
        # img = util_gui.autothreshold_numpy(img, method="otsu")

        bb = bb_map.get(imgpath, None)
        if bb is None:
            patch = resize_mat(img, (h_big, w_big))
        else:
            # Must make sure that all patches are the same shape.
            patch = resize_mat(img[bb[0]:bb[1], bb[2]:bb[3]], (h_big, w_big))
        # Reshape 'patch' to be a single row of pixels, instead of rows
        # of pixels.
        patch = patch.reshape(1, patch.shape[0]*patch.shape[1])
        data[row,:] = patch
    '''
    # Inspiration for PCA-related code comes from:
    #     http://glowingpython.blogspot.it/2011/07/pca-and-image-compression-with-numpy.html

    # 1.) Call PCA on the data matrix, extract first N principle comps
    M = (data - np.mean(data.T, axis=1))  # subtract mean, along cols
    (latent, coeff) = np.linalg.eig(np.cov(M))
    p = np.size(coeff, axis=1)

    idx = pylab.argsort(latent)  # sort eigenvalues
    idx = idx[::-1]  # ascending order (i.e. by 'relevance')
    # idx is a sorted list of indices into imgpaths, i.e. if there
    # are 5 images, and idx is:
    #   idx := [4, 1, 3, 2, 0]
    # then this means that imgpaths[4] most explains the variance,
    # followed by imgpaths[1], etc.
    idx = idx[:k]
    cluster_centers = data[idx, :]
    clustering = {}  # maps {int clusterID: [imgpath_i, ...]}

    # 2.) Nearest-Neighbors to cluster_centers
    for i, imgarray in enumerate(data):
        best_dist, best_j = None, None
        for j, clustercenter in enumerate(cluster_centers):
            dist = np.linalg.norm(imgarray - clustercenter)
            if best_dist is None or dist < best_dist:
                best_dist = dist
                best_j = j
        clustering.setdefault(best_j, []).append(imgpaths[i])
    return clustering
def vals_correlation(df, val1, val2, **kwargs):
    print "-----------------------------------------------------"
    print "%s-%s correlation" % (val1, val2)
    df2 = df.drop_duplicates(subset=['neuron name', 'neuron type'])

    if 'logtransform' in kwargs and kwargs['logtransform']:
        xtransform = pylab.log10
        ytransform = pylab.log10
        df2 = df2[(df2[val1] > 0) & (df2[val2] > 0)]

    v1 = df2[val1]
    v2 = df2[val2]

    xtransform = None
    ytransform = None

    if 'xtransform' in kwargs:
        xtransform = kwargs['xtransform']
    if 'ytransform' in kwargs:
        ytransform = kwargs['ytransform']

    if xtransform != None:
        v1 = xtransform(v1)
    if ytransform != None:
        v2 = ytransform(v2)

    print pearsonr(v1, v2)
    print spearmanr(v1, v2)

    regression_df = df2.copy()
    add_regression_cols(regression_df, val1, val2, xtransform=xtransform,\
                        ytransform=ytransform)

    grouping = None
    if 'grouping' in kwargs:
        grouping = kwargs['grouping']
    else:
        grouping = 'neuron type'

    grouping_subset = None
    if 'grouping_subset' in kwargs:
        assert 'grouping' in kwargs
        grouping_subset = kwargs['grouping_subset']
    else:
        #grouping_subset = ['axon', 'truncated axon', 'apical dendrite', 'basal dendrite']
        grouping_subset = df2[grouping].unique()

    sns.set()
    pylab.figure()
    nrows = len(grouping_subset) + 1
    pylab.subplot(nrows, 1, 1)
    pylab.scatter(v1, v2)
    x = v1
    y = regression_df['%s_hat' % val2]
    order = pylab.argsort(x)
    x = x[order]
    y = y[order]
    pylab.plot(x, y, c='g')

    row = 2

    df3 = df2[df2[grouping].isin(grouping_subset)]
    for name, group in df3.groupby(grouping):
        print name
        pylab.subplot(nrows, 1, row)
        row += 1
        v1 = pylab.array(group[val1])
        v2 = pylab.array(group[val2])
        if xtransform != None:
            v1 = xtransform(v1)
        if ytransform != None:
            v2 = ytransform(v2)
        print pearsonr(v1, v2)
        print spearmanr(v1, v2)

        regression_df = group.copy()
        add_regression_cols(regression_df, val1, val2, xtransform=xtransform,\
                            ytransform=ytransform)

        pylab.scatter(v1, v2)
        x = pylab.array(v1)
        y = pylab.array(regression_df['%s_hat' % val2])
        order = pylab.argsort(x)
        x = x[order]
        y = y[order]
        pylab.plot(x, y, c='g')

    pylab.tight_layout()

    outdir = None
    if 'outdir' in kwargs:
        outdir = kwargs['outdir']
    else:
        outdir = FIGS_DIR
    figname = '%s/%s_%s.pdf' % (outdir, val1, val2)
    pylab.savefig('%s/%s_%s.pdf' % (outdir, val1, val2), format='pdf')

    pylab.close()
def scatter_dists(models_df, outdir=FIGS_DIR, subset=False):
    df = models_df[['neuron name', 'neuron type', 'model', 'dist']]
    model_dists = defaultdict(list)
    unique_neurons = 0
    for name, group in df.groupby(['neuron name', 'neuron type']):
        if len(group['model'].unique()) != 4:
            print group
            continue
        unique_neurons += 1
        if subset and unique_neurons % 5 != 0:
            continue
        for model, group2 in group.groupby('model'):
            #group2 = group2.head(n=20)
            model_dists[model].append(pylab.mean(group2['dist']))

    print "-------------"
    print "unique scatter neurons", unique_neurons
    print "-------------"

    order = pylab.argsort(model_dists['neural'])
    #x = pylab.arange(len(model_dists['neural']))
    pylab.figure()
    sns.set()

    model_colors = {
        'neural': 'r',
        'centroid': 'g',
        'random': 'm',
        'barabasi': 'c'
    }
    model_markers = {
        'neural': 'x',
        'centroid': 'o',
        'random': '^',
        'barabasi': 's'
    }
    model_labels = {
        'neural': 'Neural arbor',
        'centroid': 'Centroid',
        'random': 'Random',
        'barabasi': u'Barab\u00E1si-Albert'
    }

    max_dist = float('-inf')

    plot_order = ['random', 'barabasi', 'centroid', 'neural']
    for model in plot_order:
        dists = model_dists[model]
        dists = pylab.array(dists)
        y = dists[order]
        if LOG_DIST:
            y = pylab.log10(y)

        #y = y[::5]
        x = pylab.arange(len(y))

        max_dist = max(max_dist, max(y))

        color = model_colors[model]
        marker = model_markers[model]
        label = model_labels[model]
        pylab.scatter(x, y, label=label, c=color, marker=marker)

    pylab.xlabel('neural arbor index', fontsize=20)
    ylab = 'distance to Pareto front'
    if LOG_DIST:
        ylab = 'log(' + ylab + ')'
    pylab.ylabel(ylab, fontsize=20)
    leg = pylab.legend(ncol=2, frameon=True)
    leg.get_frame().set_linewidth(5)
    leg.get_frame().set_edgecolor('k')
    ax = pylab.gca()
    pylab.setp(ax.get_legend().get_texts(), fontsize=20)  # for legend text
    pylab.ylim(-0.1, max_dist + 0.9)

    pylab.xticks(fontsize=15, rotation=75)
    pylab.yticks(fontsize=15)

    fname = 'pareto_dists'
    if subset:
        fname += '_subset'

    pylab.tight_layout()
    pylab.savefig('%s/%s.pdf' % (outdir, fname), format='pdf')
    # and how much each branch of the fil overlaps each elongated dendro
    nbranches = fils.filaments[ifil].branch_properties['number']
    pixoverlapbranches = pl.zeros([nbranches, n_elong_dendro], dtype=int)

    for iz in range(n_elong_dendro):
        # dendro indices relative to fil. subarray
        drelx = d[zelong[iz]].indices()[2] - off[0][1]
        drely = d[zelong[iz]].indices()[1] - off[0][0]
        zz = pl.where((drelx >= 0) * (drely >= 0) * (drelx < sfarr[1]) *
                      (drely < sfarr[0]))[0]
        if len(zz) > 0:
            pixoverlap[iz] = farr[drely[zz], drelx[zz]].sum()
            farr_overlap[drely[zz], drelx[zz]] = farr[drely[zz], drelx[zz]]

    zorder = pl.argsort(pixoverlap)[::-1]
    # now if any dendros overlap this entire fil,
    if pixoverlap[zorder[0]] > 0:
        zoverlap = pl.where(pixoverlap[zorder] > 0)[0]
        # remember which dendros overlap:
        associated_dendros.append({
            "indices": zelong[zorder[zoverlap]],
            "pixoverlap": pixoverlap[zorder[zoverlap]]
        })
        pl.figure(2)
        pl.text(off[0][1], off[0][0], ifil, color="m")

        # go through overlapping dendros and analyze overlap w/subbranches from _labeled_mask
        thisbranchlist = []

        # TODO add longest path from subbranch at least to a hub also?
Exemple #38
0
def main():
    import optparse
    from numpy import sum

    # Parse command line
    parser = optparse.OptionParser(usage=USAGE)
    parser.add_option("-p",
                      "--plot",
                      action="store_true",
                      help="Generate pdf with IR-spectrum")
    parser.add_option("-i",
                      "--info",
                      action="store_true",
                      help="Set up/ Calculate vibrations & quit")
    parser.add_option("-s",
                      "--suffix",
                      action="store",
                      help="Call suffix for binary e.g. 'mpirun -n 4 '",
                      default='')
    parser.add_option("-r",
                      "--run",
                      action="store",
                      help="path to FHI-aims binary",
                      default='')
    parser.add_option("-x",
                      "--relax",
                      action="store_true",
                      help="Relax initial geometry")
    parser.add_option("-m",
                      "--molden",
                      action="store_true",
                      help="Output in molden format")
    parser.add_option("-w",
                      "--distort",
                      action="store_true",
                      help="Output geometry distorted along imaginary modes")
    parser.add_option("-t",
                      "--submit",
                      action="store",
                      help="""\
Path to submission script, string <jobname>
will be replaced by name + counter, string 
                            <outfile> will be replaced by filename""")
    parser.add_option("-d",
                      "--delta",
                      action="store",
                      type="float",
                      help="Displacement",
                      default=0.0025)

    options, args = parser.parse_args()
    if options.info:
        print __doc__
        sys.exit(0)
    if len(args) != 2:
        parser.error("Need exactly two arguments")

    AIMS_CALL = options.suffix + ' ' + options.run
    hessian_thresh = -1
    name = args[0]
    mode = args[1]
    delta = options.delta

    run_aims = False
    if options.run != '': run_aims = True

    submit_script = options.submit is not None

    if options.plot:
        import matplotlib as mpl
        mpl.use('Agg')
        from pylab import figure

    if options.plot or mode == '1':
        from pylab import savetxt, transpose, eig, argsort, sort,\
            sign, pi, dot, sum, linspace, argmin, r_, convolve

    # Constant from scipy.constants
    bohr = constants.value('Bohr radius') * 1.e10
    hartree = constants.value('Hartree energy in eV')
    at_u = constants.value('atomic mass unit-kilogram relationship')
    eV = constants.value('electron volt-joule relationship')
    c = constants.value('speed of light in vacuum')
    Ang = 1.0e-10
    hbar = constants.value('Planck constant over 2 pi')
    Avo = constants.value('Avogadro constant')
    kb = constants.value('Boltzmann constant in eV/K')
    hessian_factor = eV / (at_u * Ang * Ang)
    grad_dipole_factor = (eV / (1. / (10 * c))) / Ang  #(eV/Ang -> D/Ang)
    ir_factor = 1

    # Asign all filenames
    inputgeomerty = 'geometry.in.' + name
    inputcontrol = 'control.in.' + name
    atomicmasses = 'masses.' + name + '.dat'
    xyzfile = name + '.xyz'
    moldenname = name + '.molden'
    hessianname = 'hessian.' + name + '.dat'
    graddipolename = 'grad_dipole.' + name + '.dat'
    irname = 'ir.' + name + '.dat'
    deltas = array([-delta, delta])
    coeff = array([-1, 1])
    c_zero = -1. / (2. * delta)

    f = open('control.in', 'r')  # read control.in template
    template_control = f.read()
    f.close

    if submit_script:
        f = open(options.submit, 'r')  # read submission script template
        template_job = f.read()
        f.close

    folder = ''  # Dummy
    ########### Central Point ##################################################
    if options.relax and mode == '0':
        # First relax input geometry
        filename = name + '.out'
        folder = name + '_relaxation'
        if not os.path.exists(folder): os.mkdir(folder)  # Create folder
        shutil.copy('geometry.in', folder + '/geometry.in')  # Copy geometry
        new_control = open(folder + '/control.in', 'w')
        new_control.write(template_control +
                          'relax_geometry trm 1E-3\n')  # Relax!
        new_control.close()
        os.chdir(folder)  # Change directoy
        print 'Central Point'
        if run_aims:
            os.system(AIMS_CALL + ' > ' +
                      filename)  # Run aims and pipe the output

#  into a file named 'filename'
        if submit_script: replace_submission(template_job, name, 0, filename)
        os.chdir('..')

    ############################################################################
    # Check for relaxed geometry
    if os.path.exists(folder + '/geometry.in.next_step'):
        geometry = open(folder + '/geometry.in.next_step', 'r')
    else:
        geometry = open('geometry.in', 'r')

    # Read input geometry
    n_line = 0
    struc = structure()
    lines = geometry.readlines()

    for line in lines:
        n_line = n_line + 1
        if line.rfind('set_vacuum_level') != -1:  # Vacuum Level
            struc.vacuum_level = float(split_line(line)[-1])
        if line.rfind('lattice_vector') != -1:  # Lattice vectors and periodic
            lat = split_line(line)[1:]
            struc.lattic_vector = append(struc.lattic_vector,
                                         float64(array(lat))[newaxis, :],
                                         axis=0)
            struc.periodic = True
        if line.rfind('atom') != -1:  # Set atoms
            line_vals = split_line(line)
            at = Atom(line_vals[-1], line_vals[1:-1])
            if n_line < len(lines):
                nextline = lines[n_line]
                if nextline.rfind(
                        'constrain_relaxation') != -1:  # constrained?
                    at = Atom(line_vals[-1], line_vals[1:-1], True)
                else:
                    at = Atom(line_vals[-1], line_vals[1:-1])
            struc.join(at)
    geometry.close()
    n_atoms = struc.n()
    n_constrained = n_atoms - sum(struc.constrained)

    # Atomic mass file
    mass_file = open(atomicmasses, 'w')
    mass_vector = zeros([0])
    for at_unconstrained in struc.atoms[struc.constrained == False]:
        mass_vector = append(mass_vector,
                             ones(3) * 1. / sqrt(at_unconstrained.mass()))
        line = '{0:10.5f}'.format(at_unconstrained.mass())
        for i in range(3):
            line = line + '{0:11.4f}'.format(at_unconstrained.coord[i])
        line = line + '{0:}\n'.format(at_unconstrained.kind)
        mass_file.writelines(line)
    mass_file.close()

    # Init
    dip = zeros([n_constrained * 3, 3])
    hessian = zeros([n_constrained * 3, n_constrained * 3])
    index = 0
    counter = 1

    # Set up / Read folders for displaced atoms
    for atom in arange(n_atoms)[struc.constrained == False]:
        for coord in arange(3):
            for delta in deltas:
                filename=name+'.i_atom_'+str(atom)+'.i_coord_'+str(coord)+'.displ_'+\
                 str(delta)+'.out'
                folder=name+'.i_atom_'+str(atom)+'.i_coord_'+str(coord)+'.displ_'+\
                 str(delta)
                if mode == '0':  # Put new geometry and control.in into folder
                    struc_new = copy.deepcopy(struc)
                    struc_new.atoms[atom].coord[coord]=\
                                                struc_new.atoms[atom].coord[coord]+delta
                    geoname='geometry.i_atom_'+str(atom)+'.i_coord_'+str(coord)+\
                            '.displ_'+str(delta)+'.in'
                    if not os.path.exists(folder): os.mkdir(folder)
                    new_geo = open(folder + '/geometry.in', 'w')
                    newline='#\n# temporary structure-file for finite-difference '+\
                     'calculation of forces\n'
                    newline=newline+'# displacement {0:8.4f} of \# atom '.format(delta)+\
                      '{0:5} direction {1:5}\n#\n'.format(atom,coord)
                    new_geo.writelines(newline + struc_new.to_str())
                    new_geo.close()
                    new_control = open(folder + '/control.in', 'w')
                    template_control = template_control.replace(
                        'relax_geometry', '#relax_geometry')
                    new_control.write(template_control+'compute_forces .true. \n'+\
                        'final_forces_cleaned '+\
                        '.true. \noutput dipole \n')
                    new_control.close()
                    os.chdir(folder)  # Change directoy
                    print 'Processing atom: '+str(atom+1)+'/'+str(n_atoms)+', coord.: '+\
                      str(coord+1)+'/'+str(3)+', delta: '+str(delta)
                    if run_aims:
                        os.system(AIMS_CALL + ' > ' +
                                  filename)  # Run aims and pipe the output
                #  into a file named 'filename'
                    if submit_script:
                        replace_submission(template_job, name, counter,
                                           filename)
                    # os.system('qsub job.sh') # Mind the environment variables
                    os.chdir('..')

                if mode == '1':  # Read output
                    forces_reached = False
                    atom_count = 0
                    data = open(folder + '/' + filename)
                    for line in data.readlines():
                        if line.rfind(
                                'Dipole correction potential jump') != -1:
                            dip_jump = float(split_line(line)[-2])  # Periodic
                        if line.rfind('| Total dipole moment [eAng]') != -1:
                            dip_jump = float64(
                                split_line(line)[-3:])  # Cluster
                        if forces_reached and atom_count < n_atoms:  # Read Forces
                            struc.atoms[atom_count].force = float64(
                                split_line(line)[2:])
                            atom_count = atom_count + 1
                            if atom_count == n_atoms:
                                forces_reached = False
                        if line.rfind('Total atomic forces') != -1:
                            forces_reached = True
                    data.close()
                    if struc.periodic:
                        dip[index, 2] = dip[
                            index,
                            2] + dip_jump * coeff[deltas == delta] * c_zero
                    else:
                        dip[index, :] = dip[index, :] + dip_jump * coeff[
                            deltas == delta] * c_zero
                    forces = array([])
                    for at_unconstrained in struc.atoms[struc.constrained ==
                                                        False]:
                        forces = append(
                            forces,
                            coeff[deltas == delta] * at_unconstrained.force)
                    hessian[index, :] = hessian[index, :] + forces * c_zero
                counter = counter + 1
            index = index + 1
    if mode == '1':  # Calculate vibrations
        print 'Entering hessian diagonalization'
        print 'Number of atoms                = ' + str(n_atoms)
        print 'Name of Hessian input file     = ' + hessianname
        print 'Name of grad dipole input file = ' + graddipolename
        print 'Name of Masses  input file     = ' + atomicmasses
        print 'Name of XYZ output file        = ' + xyzfile
        print 'Threshold for Matrix elements  = ' + str(hessian_thresh)
        if (hessian_thresh < 0.0):            print '     All matrix elements are taken'+\
' into account by default\n'
        savetxt(hessianname, hessian)
        savetxt(graddipolename, dip)

        mass_mat = mass_vector[:, newaxis] * mass_vector[newaxis, :]
        hessian[abs(hessian) < hessian_thresh] = 0.0
        hessian = hessian * mass_mat * hessian_factor
        hessian = (hessian + transpose(hessian)) / 2.
        # Diagonalize hessian (scipy)
        print 'Solving eigenvalue system for Hessian Matrix'
        freq, eig_vec = eig(hessian)
        print 'Done ... '
        eig_vec = eig_vec[:, argsort(freq)]
        freq = sort(sign(freq) * sqrt(abs(freq)))
        ZPE = hbar * (freq) / (2.0 * eV)
        freq = (freq) / (200. * pi * c)

        grad_dipole = dip * grad_dipole_factor
        eig_vec = eig_vec * mass_vector[:, newaxis] * ones(
            len(mass_vector))[newaxis, :]
        infrared_intensity = sum(dot(transpose(grad_dipole),eig_vec)**2,axis=0)*\
                             ir_factor
        reduced_mass = sum(eig_vec**2, axis=0)
        norm = sqrt(reduced_mass)
        eig_vec = eig_vec / norm

        # The rest is output, xyz, IR,...
        print 'Results\n'
        print 'List of all frequencies found:'
        print 'Mode number      Frequency [cm^(-1)]   Zero point energy [eV]   '+\
              'IR-intensity [D^2/Ang^2]'
        for i in range(len(freq)):
            print '{0:11}{1:25.8f}{2:25.8f}{3:25.8f}'.format(
                i + 1, freq[i], ZPE[i], infrared_intensity[i])
        print '\n'
        print 'Summary of zero point energy for entire system:'
        print '| Cumulative ZPE               = {0:15.8f} eV'.format(sum(ZPE))
        print '| without first six eigenmodes = {0:15.8f} eV\n'.format(
            sum(ZPE) - sum(ZPE[:6]))
        print 'Stability checking - eigenvalues should all be positive for a '+\
               'stable structure. '
        print 'The six smallest frequencies should be (almost) zero:'
        string = ''
        for zz in ZPE[:6]:
            string = string + '{0:25.8f}'.format(zz)
        print string
        print 'Compare this with the largest eigenvalue, '
        print '{0:25.8f}'.format(freq[-1])

        nums = arange(n_atoms)[struc.constrained == False]
        nums2 = arange(n_atoms)[struc.constrained]
        newline = ''
        newline_ir = '[INT]\n'
        if options.molden:
            newline_molden = '[Molden Format]\n[GEOMETRIES] XYZ\n'
            newline_molden = newline_molden + '{0:6}\n'.format(n_atoms) + '\n'
            for i_atoms in range(n_constrained):
                newline_molden = newline_molden + '{0:6}'.format(
                    struc.atoms[nums[i_atoms]].kind)
                for i_coord in range(3):
                    newline_molden = newline_molden + '{0:10.4f}'.format(
                        struc.atoms[nums[i_atoms]].coord[i_coord])
                newline_molden = newline_molden + '\n'
            newline_molden = newline_molden + '[FREQ]\n'
            for i in range(len(freq)):
                newline_molden = newline_molden + '{0:10.3f}\n'.format(freq[i])
            newline_molden = newline_molden + '[INT]\n'
            for i in range(len(freq)):
                newline_molden = newline_molden + '{0:17.6e}\n'.format(
                    infrared_intensity[i])
            newline_molden = newline_molden + '[FR-COORD]\n'
            newline_molden = newline_molden + '{0:6}\n'.format(n_atoms) + '\n'
            for i_atoms in range(n_constrained):
                newline_molden = newline_molden + '{0:6}'.format(
                    struc.atoms[nums[i_atoms]].kind)
                for i_coord in range(3):
                    newline_molden = newline_molden + '{0:10.4f}'.format(
                        struc.atoms[nums[i_atoms]].coord[i_coord] / bohr)
                newline_molden = newline_molden + '\n'
            newline_molden = newline_molden + '[FR-NORM-COORD]\n'

        for i in range(len(freq)):
            newline = newline + '{0:6}\n'.format(n_atoms)
            if freq[i] > 0:
                newline = newline + 'stable frequency at '
            elif freq[i] < 0:
                newline = newline + 'unstable frequency at '
                if options.distort and freq[i] < -50:
                    struc_new = copy.deepcopy(struc)
                    for i_atoms in range(n_constrained):
                        for i_coord in range(3):
                            struc_new.atoms[i_atoms].coord[i_coord]=\
                            struc_new.atoms[i_atoms].coord[i_coord]+\
                           eig_vec[(i_atoms)*3+i_coord,i]
                    geoname = name + '.distorted.vibration_' + str(
                        i + 1) + '.geometry.in'
                    new_geo = open(geoname, 'w')
                    newline_geo = '#\n# distorted structure-file for based on eigenmodes\n'
                    newline_geo=newline_geo+\
                            '# vibration {0:5} :{1:10.3f} 1/cm\n#\n'.format(i+1,freq[i])
                    new_geo.writelines(newline_geo + struc_new.to_str())
                    new_geo.close()
            elif freq[i] == 0:
                newline = newline + 'translation or rotation '
            newline = newline + '{0:10.3f} 1/cm IR int. is '.format(freq[i])
            newline = newline + '{0:10.4e} D^2/Ang^2; red. mass is '.format(
                infrared_intensity[i])
            newline = newline + '{0:5.3f} a.m.u.; force const. is '.format(
                1.0 / reduced_mass[i])
            newline = newline + '{0:5.3f} mDyne/Ang.\n'.format(
                ((freq[i] *
                  (200 * pi * c))**2) * (1.0 / reduced_mass[i]) * at_u * 1.e-2)
            if options.molden:                newline_molden=newline_molden+\
                                      'vibration {0:6}\n'.format(i+1)
            for i_atoms in range(n_constrained):
                newline = newline + '{0:6}'.format(
                    struc.atoms[nums[i_atoms]].kind)
                for i_coord in range(3):
                    newline = newline + '{0:10.4f}'.format(
                        struc.atoms[nums[i_atoms]].coord[i_coord])
                for i_coord in range(3):
                    newline = newline + '{0:10.4f}'.format(
                        eig_vec[(i_atoms) * 3 + i_coord, i])
                    if options.molden:
                        newline_molden = newline_molden + '{0:10.4f}'.format(
                            eig_vec[(i_atoms) * 3 + i_coord, i] / bohr)
                newline = newline + '\n'
                if options.molden: newline_molden = newline_molden + '\n'
            for i_atoms in range(n_atoms - n_constrained):
                newline = newline + '{0:6}'.format(
                    struc.atoms[nums2[i_atoms]].kind)
                for i_coord in range(3):
                    newline = newline + '{0:10.4f}'.format(
                        struc.atoms[nums2[i_atoms]].coord[i_coord])
                for i_coord in range(3):
                    newline = newline + '{0:10.4f}'.format(0.0)
                newline = newline + '\n'
            newline_ir = newline_ir + '{0:10.4e}\n'.format(
                infrared_intensity[i])
        xyz = open(xyzfile, 'w')
        xyz.writelines(newline)
        xyz.close()
        ir = open(irname, 'w')
        ir.writelines(newline_ir)
        ir.close()
        if options.molden:
            molden = open(moldenname, 'w')
            molden.writelines(newline_molden)
            molden.close()

        if mode == '1' and options.plot:
            x = linspace(freq.min() - 500, freq.max() + 500, 1000)
            z = zeros(len(x))
            for i in range(len(freq)):
                z[argmin(abs(x - freq[i]))] = infrared_intensity[i]
            window_len = 150
            gauss = signal.gaussian(window_len, 10)
            s = r_[z[window_len - 1:0:-1], z, z[-1:-window_len:-1]]
            z_convolve = convolve(gauss / gauss.sum(), s,
                                  mode='same')[window_len - 1:-window_len + 1]
            fig = figure(0)
            ax = fig.add_subplot(111)
            ax.plot(x, z_convolve, 'r', lw=2)
            ax.set_xlim([freq.min() - 500, freq.max() + 500])
            ax.set_ylim([-0.01, ax.get_ylim()[1]])
            ax.set_yticks([])
            ax.set_xlabel('Frequency [1/cm]', size=20)
            ax.set_ylabel('Intensity [a.u.]', size=20)
            fig.savefig(name + '_IR_spectrum.pdf')

        print '\n Done. '
Exemple #39
0
  def refine(self, edge_errors, gamma=1.4):
    """
    This function iterates through the cells in the mesh, then refines
    the mesh based on the relative error and the cell's location in the
    mesh.
    
    :param edge_errors : Dolfin edge function containing edge errors of 
                         of the current mesh.
    :param gamma       : Scaling factor for determining which edges need be 
                         refined.  This is determined by the average error 
                         of the edge_errors variable
    """
    mesh = self.mesh
    
    mesh.init(1,2)
    mesh.init(0,2)
    mesh.init(0,1)
    
    avg_error                 = edge_errors.array().mean()
    error_sorted_edge_indices = p.argsort(edge_errors.array())[::-1]
    refine_edge               = FacetFunction('bool', mesh)
    for e in edges(mesh):
      refine_edge[e] = edge_errors[e] > gamma*avg_error

    coordinates = p.copy(self.mesh.coordinates())      
    current_new_vertex = len(coordinates)
    cells_to_delete = []
    new_cells = []

    for iteration in range(refine_edge.array().sum()):
      for e in facets(self.mesh):
        if refine_edge[e] and (e.index()==error_sorted_edge_indices[0]):
          adjacent_cells = e.entities(2)
          adjacent_vertices = e.entities(0)
          if not any([c in cells_to_delete for c in adjacent_cells]):
            new_x,new_y = e.midpoint().x(),e.midpoint().y()
            coordinates = p.vstack((coordinates,[new_x,new_y]))
            for c in adjacent_cells:
              off_facet_vertex = list(self.mesh.cells()[c])
              [off_facet_vertex.remove(ii) for ii in adjacent_vertices]
              for on_facet_vertex in adjacent_vertices:
                new_cell = p.sort([current_new_vertex,off_facet_vertex[0],on_facet_vertex])
                new_cells.append(new_cell)
              cells_to_delete.append(c)
            current_new_vertex+=1
      error_sorted_edge_indices = error_sorted_edge_indices[1:]

    old_cells = self.mesh.cells()
    keep_cell = p.ones(len(old_cells))
    keep_cell[cells_to_delete] = 0
    old_cells_parsed = old_cells[keep_cell.astype('bool')]
    all_cells = p.vstack((old_cells_parsed,new_cells))
    n_cells = len(all_cells)

    e = MeshEditor()
    refined_mesh = Mesh()
    e.open(refined_mesh,self.mesh.geometry().dim(),self.mesh.topology().dim())
    e.init_vertices(current_new_vertex)
    for index,x in enumerate(coordinates):
      e.add_vertex(index,x[0],x[1])
  
    e.init_cells(n_cells)
    for index,c in enumerate(all_cells):
      e.add_cell(index,c.astype('uintc'))

    e.close()
    refined_mesh.order()
    self.mesh = refined_mesh 
Exemple #40
0
 def sort_atoms(self):
     """Sort the atoms in the cell according to atomic symbol."""
     ind = m.argsort(self.atom_symbols)
     self.atom_symbols = m.array(self.atom_symbols)[ind]
     self.atoms = self.atoms[ind]
     self.selective_flags = m.array(self.selective_flags)[ind]
Exemple #41
0
def entropy(points, logp, N_entropy=10000, N_norm=2500):
    r"""
    Return entropy estimate and uncertainty from a random sample.

    *points* is a set of draws from an underlying distribution, as returned
    by a Markov chain Monte Carlo process for example.

    *logp* is the log-likelihood for each draw.

    *N_norm* is the number of points $k$ to use to estimate the posterior
    density normalization factor $P(D) = \hat N$, converting
    from $\log( P(D|M) P(M) )$ to $\log( P(D|M)P(M)/P(D) )$. The relative
    uncertainty $\Delta\hat S/\hat S$ scales with $\sqrt{k}$, with the
    default *N_norm=2500* corresponding to 2% relative uncertainty.
    Computation cost is $O(nk)$ where $n$ is number of points in the draw.

    *N_entropy* is the number of points used to estimate the entropy
    $\hat S = - \int P(M|D) \log P(M|D)$ from the normalized log likelihood
    values.
    """

    # Use a random subset to estimate density
    if N_norm >= len(logp):
        norm_points = points
    else:
        idx = permutation(len(points))[:N_entropy]
        norm_points = points[idx]

    # Use a different subset to estimate the scale factor between density
    # and logp.
    if N_entropy is None:
        N_entropy = 10000
    if N_entropy >= len(logp):
        entropy_points, eval_logp = points, logp
    else:
        idx = permutation(len(points))[:N_entropy]
        entropy_points, eval_logp = points[idx], logp[idx]
    """
    # Try again, just using the points from the high probability regions
    # to determine the scale factor
    N_norm = min(len(logp), 5000)
    N_entropy = int(0.8*N_norm)
    idx = np.argsort(logp)
    norm_points = points[idx[-N_norm:]]
    entropy_points = points[idx[-N_entropy:]]
    eval_logp = logp[idx[-N_entropy:]]
    """

    # Normalize p to a peak probability of 1 so that exp() doesn't underflow.
    #
    # This should be okay since for the normalizing constant C:
    #
    #      u' = e^(ln u + ln C) = e^(ln u)e^(ln C) = C u
    #
    # Using eq. 11 of Kramer with u' substituted for u:
    #
    #      N_est = < u'/p > = < C u/p > = C < u/p >
    #
    #      S_est = - < ln q >
    #            = - < ln (u'/N_est) >
    #            = - < ln C + ln u - ln (C <u/p>) >
    #            = - < ln u + ln C - ln C - ln <u/p> >
    #            = - < ln u - ln <u/p> >
    #            = - < ln u > + ln <u/p>
    #
    # Uncertainty comes from eq. 13:
    #
    #      N_err^2 = 1/(k-1) sum( (u'/p - <u'/p>)^2 )
    #              = 1/(k-1) sum( (C u/p - <C u/p>)^2 )
    #              = C^2 std(u/p)^2
    #      S_err = std(u'/p) / <u'/p> = (C std(u/p))/(C <u/p>) = std(u/p)/<u/p>
    #
    # So even though the constant C shows up in N_est, N_err, it cancels
    # again when S_est, S_err is formed.
    log_scale = np.max(eval_logp)
    # print("max log sample: %g"%log_scale)
    eval_logp -= log_scale

    # Compute entropy and uncertainty in nats
    # Note: if all values are the same in any dimension then we have a dirac
    # functional with infinite probability at every sample point, and the
    # differential entropy estimate will yield H = -inf.
    rho = density(norm_points, entropy_points)
    #print(rho.min(), rho.max(), eval_logp.min(), eval_logp.max())
    frac = exp(eval_logp) / rho
    n_est, n_err = mean(frac), std(frac)
    if n_est == 0.:
        s_est, s_err = -np.inf, 0.
    else:
        s_est = log(n_est) - mean(eval_logp)
        s_err = n_err / n_est
    #print(n_est, n_err, s_est/LN2, s_err/LN2)
    ##print(np.median(frac), log(np.median(frac))/LN2, log(n_est)/LN2)
    if False:
        import pylab
        idx = pylab.argsort(entropy_points[:, 0])
        pylab.figure()
        pylab.subplot(221)
        pylab.hist(points[:, 0], bins=50, normed=True, log=True)
        pylab.plot(entropy_points[idx, 0], rho[idx], label='density')
        pylab.plot(entropy_points[idx, 0],
                   exp(eval_logp + log_scale)[idx],
                   label='p')
        pylab.ylabel("p(x)")
        pylab.legend()
        pylab.subplot(222)
        pylab.hist(points[:, 0], bins=50, normed=True, log=False)
        pylab.plot(entropy_points[idx, 0], rho[idx], label='density')
        pylab.plot(entropy_points[idx, 0],
                   exp(eval_logp + log_scale)[idx],
                   label='p')
        pylab.ylabel("p(x)")
        pylab.legend()
        pylab.subplot(212)
        pylab.plot(entropy_points[idx, 0], frac[idx], '.')
        pylab.xlabel("P[0] value")
        pylab.ylabel("p(x)/kernel density")

    # return entropy and uncertainty in bits
    return s_est / LN2, s_err / LN2
def cluster_imgs_pca_kmeans(imgpaths, bb_map=None, k=2, N=3, do_align=True):
    """ Using PCA and K-means, cluster the imgpaths into 'k' clusters,
    using the first 'N' principal components.
    Algorithm details:
        Input: Set of image patches A, of size NxM
        0.) Discretize the image patch into K N'xM' equally-sized slices.
        1.) Using the discretized image patches A', run PCA to extract
            the slices S that maximize the variance
        2.) Run k-means (k=2) on the slices S.
    Input:
        list imgpaths: (imgpath_i, ...)
        dict bb_map: If you want to only cluster based on a sub-region
                     of each image, pass in 'bb_map', which is:
                         {str imgpath: (y1,y2,x1,x2)}
        int k: number of clusters
        int N: Number of principle components to use. (NOT USED)
    Output:
        dict clusters, maps {str clusterID: [imgpath_i, ...]}
    """
    data = imgpaths_to_mat(imgpaths, bb_map=bb_map, do_align=do_align)
    '''
    if bb_map == None:
        bb_map = {}
        h_big, w_big = get_largest_img_dims(imgpaths)
    else:
        bb_big = get_largest_bb(bb_map.values())
        h_big = int(abs(bb_big[0] - bb_big[1]))
        w_big = int(abs(bb_big[2] - bb_big[3]))
    #TEMP: Cut off first half, last quarter
    #w_big = (w_big / 2) - (w_big / 4)
    # 0.) First, convert images into MxN array, where M is the number
    #     of images, and N is the number of pixels of each image.
    data = np.zeros((len(imgpaths), h_big*w_big))
    for row, imgpath in enumerate(imgpaths):
        img = scipy.misc.imread(imgpath, flatten=True)
        # TEMP: Cut off first half
        #img = img[:, img.shape[1]/2:(3*img.shape[1])/4]
        #img = util_gui.autothreshold_numpy(img, method="otsu")

        bb = bb_map.get(imgpath, None)
        if bb == None:
            patch = resize_mat(img, (h_big, w_big))
        else:
            # Must make sure that all patches are the same shape.
            patch = resize_mat(img[bb[0]:bb[1], bb[2]:bb[3]], (h_big, w_big))
        # Reshape 'patch' to be a single row of pixels, instead of rows
        # of pixels.
        patch = patch.reshape(1, patch.shape[0]*patch.shape[1])
        data[row,:] = patch
    '''
    # Inspiration for PCA-related code comes from:
    #     http://glowingpython.blogspot.it/2011/07/pca-and-image-compression-with-numpy.html

    # 1.) Call PCA on the data matrix, extract first N principle comps
    M = (data - np.mean(data.T, axis=1)) # subtract mean, along cols
    (latent, coeff) = np.linalg.eig(np.cov(M))
    p = np.size(coeff, axis=1)

    idx = pylab.argsort(latent)  # sort eigenvalues
    idx = idx[::-1]        # ascending order (i.e. by 'relevance')
    # idx is a sorted list of indices into imgpaths, i.e. if there
    # are 5 images, and idx is:
    #   idx := [4, 1, 3, 2, 0]
    # then this means that imgpaths[4] most explains the variance,
    # followed by imgpaths[1], etc.
    idx = idx[:k]
    cluster_centers = data[idx, :]
    clustering = {} # maps {int clusterID: [imgpath_i, ...]}
    
    # 2.) Nearest-Neighbors to cluster_centers
    for i, imgarray in enumerate(data):
        best_dist, best_j = None, None
        for j, clustercenter in enumerate(cluster_centers):
            dist = np.linalg.norm(imgarray - clustercenter)
            if best_dist == None or dist < best_dist:
                best_dist = dist
                best_j = j
        clustering.setdefault(best_j, []).append(imgpaths[i])
    return clustering
Exemple #43
0
    tmp1 = str.split(lines[0], ',')
    tmp2 = []
    for val in tmp1:
        tmp2.append(float(val))
    N_ch = len(tmp2)

    freq = zeros(N_ch)
    for k in range(0, N_ch):
        freq[k] = tmp2[k]

    ch = zeros(N_ch)
    for k in range(0, N_ch):
        ch[k] = freq[k] / 0.01

    freq_idx = argsort(freq)

    if args.phases:
        chan_sel = 2 + 2 * array(freq_idx)
    else:
        chan_sel = 1 + 2 * array(freq_idx)

    for lineIndex in range(1, len(lines), 8):  # get 0, 8, 16, ...

        tmp = str.split(lines[lineIndex + 0], ',')
        PX_lsq.append([])
        PX_lsq[-1] = zeros(N_ch * 2 + 1)
        for k in range(0, len(tmp)):
            PX_lsq[-1][k] = float(tmp[k])
        tmp = str.split(lines[lineIndex + 1], ',')
        PX_fit.append([])
def vals_correlation(df, val1, val2, outdir=FIGS_DIR, xtransform=None,\
                     ytransform=None, logtransform=False):
    print "-----------------------------------------------------"
    print "%s-%s correlation" % (val1, val2)
    df2 = df.drop_duplicates(subset=['neuron name', 'neuron type'])
    
    if logtransform:
        xtransform = pylab.log10
        ytransform = pylab.log10
        df2 = df2[(df2[val1] > 0) & (df2[val2] > 0)]
    
    v1 = df2[val1]
    v2 = df2[val2]
    
    if xtransform != None:
        v1 = xtransform(v1)
    if ytransform != None:
        v2 = ytransform(v2)

    print pearsonr(v1, v2)
    print spearmanr(v1, v2)

    regression_df = df2.copy()
    add_regression_cols(regression_df, val1, val2, xtransform=xtransform,\
                        ytransform=ytransform)

    sns.set()
    pylab.figure()
    nrows = len(df2['neuron type'].unique()) + 1
    pylab.subplot(nrows, 1, 1)
    pylab.scatter(v1, v2)
    x = v1
    y = regression_df['%s_hat' % val2]
    order = pylab.argsort(x)
    x = x[order]
    y = y[order]
    pylab.plot(x, y, c='g')

    row = 2
    for neuron_type, group in df2.groupby('neuron type'):
        print neuron_type
        pylab.subplot(nrows, 1, row)
        row += 1
        v1 = pylab.array(group[val1])
        v2 = pylab.array(group[val2])
        if xtransform != None:
            v1 = xtransform(v1)
        if ytransform != None:
            v2 = ytransform(v2)
        print pearsonr(v1, v2)
        print spearmanr(v1, v2)
    
        regression_df = group.copy()
        add_regression_cols(regression_df, val1, val2, xtransform=xtransform,\
                            ytransform=ytransform)
        
        pylab.scatter(v1, v2)
        x = pylab.array(v1)
        y = pylab.array(regression_df['%s_hat' % val2])
        order = pylab.argsort(x)
        x = x[order]
        y = y[order]
        pylab.plot(x, y, c='g')
    
    pylab.tight_layout()
    figname = '%s/%s_%s.pdf' % (outdir, val1, val2)
    pylab.savefig('%s/%s_%s.pdf' % (outdir, val1, val2), format='pdf')

    pylab.close()
Exemple #45
0
figure = pylab.Figure()
for simulator in simulators:
    for num_nodes in nodes:
        col = 1
        subplot = figure.add_axes([x,y0+2.9*dy,w,h])
        subplot.set_title("%s (np%d)" % (simulator[:6].upper(),num_nodes), fontsize='x-large')
        subplot.set_ylabel("Membrane potential (mV)")
        
        # Get info about dataset from header of .v file
        exec(get_header("Results/VAbenchmark_%s_exc_%s_np%d.v" % (benchmark, simulator, num_nodes)))
        
        # Plot membrane potential trace
        allvdata = numpy.loadtxt("Results/VAbenchmark_%s_exc_%s_np%d.v" % (benchmark, simulator, num_nodes), comments='#')
        cell_ids = allvdata[:,1].astype(int)
        allvdata = allvdata[:,0]
        sortmap = pylab.argsort(cell_ids, kind='mergesort')
        cell_ids = pylab.take(cell_ids,sortmap)
        allvdata = pylab.take(allvdata,sortmap)
        for i in 0,1:
            tdata = pylab.arange(0,(n+1)*dt,dt)
            vdata = allvdata.compress(cell_ids==i)
            vdata = pylab.where(vdata>=v_thresh-0.05,0.0,vdata) # add fake APs for plotting
            if len(tdata) > len(vdata):
                print "Warning. Shortening tdata from %d to %d elements (%s)" % (len(tdata),len(vdata),simulator)
                tdata = tdata[0:len(vdata)]
            assert len(tdata)==len(vdata), "%d != %d (%s)" % (len(tdata),len(vdata),simulator)
            subplot.plot(tdata,vdata)
        
        # Plot spike rasters
        subplot = figure.add_axes([x,y0+2*dy,w,h])
        exc_spikedata = signals.load_spikelist("Results/VAbenchmark_%s_exc_%s_np%d.ras" % (benchmark, simulator, num_nodes))
Exemple #46
0
    def refine(self, edge_errors, gamma=1.4):
        """
    This function iterates through the cells in the mesh, then refines
    the mesh based on the relative error and the cell's location in the
    mesh.
    
    :param edge_errors : Dolfin edge function containing edge errors of 
                         of the current mesh.
    :param gamma       : Scaling factor for determining which edges need be 
                         refined.  This is determined by the average error 
                         of the edge_errors variable
    """
        mesh = self.mesh

        mesh.init(1, 2)
        mesh.init(0, 2)
        mesh.init(0, 1)

        avg_error = edge_errors.array().mean()
        error_sorted_edge_indices = pl.argsort(edge_errors.array())[::-1]
        refine_edge = FacetFunction('bool', mesh)
        for e in edges(mesh):
            refine_edge[e] = edge_errors[e] > gamma * avg_error

        coordinates = pl.copy(self.mesh.coordinates())
        current_new_vertex = len(coordinates)
        cells_to_delete = []
        new_cells = []

        for iteration in range(refine_edge.array().sum()):
            for e in facets(self.mesh):
                if refine_edge[e] and (e.index()
                                       == error_sorted_edge_indices[0]):
                    adjacent_cells = e.entities(2)
                    adjacent_vertices = e.entities(0)
                    if not any([c in cells_to_delete for c in adjacent_cells]):
                        new_x, new_y = e.midpoint().x(), e.midpoint().y()
                        coordinates = pl.vstack((coordinates, [new_x, new_y]))
                        for c in adjacent_cells:
                            off_facet_vertex = list(self.mesh.cells()[c])
                            [
                                off_facet_vertex.remove(ii)
                                for ii in adjacent_vertices
                            ]
                            for on_facet_vertex in adjacent_vertices:
                                new_cell = pl.sort([
                                    current_new_vertex, off_facet_vertex[0],
                                    on_facet_vertex
                                ])
                                new_cells.append(new_cell)
                            cells_to_delete.append(c)
                        current_new_vertex += 1
            error_sorted_edge_indices = error_sorted_edge_indices[1:]

        old_cells = self.mesh.cells()
        keep_cell = pl.ones(len(old_cells))
        keep_cell[cells_to_delete] = 0
        old_cells_parsed = old_cells[keep_cell.astype('bool')]
        all_cells = pl.vstack((old_cells_parsed, new_cells))
        n_cells = len(all_cells)

        e = MeshEditor()
        refined_mesh = Mesh()
        e.open(refined_mesh,
               self.mesh.geometry().dim(),
               self.mesh.topology().dim())
        e.init_vertices(current_new_vertex)
        for index, x in enumerate(coordinates):
            e.add_vertex(index, x[0], x[1])

        e.init_cells(n_cells)
        for index, c in enumerate(all_cells):
            e.add_cell(index, c.astype('uintc'))

        e.close()
        refined_mesh.order()
        self.mesh = refined_mesh
Exemple #47
0
    for key in keys[1:]:
        mus['test'].append(f['test_set/accu/' + key][...][[0, -1]])
    f.close()

    n_layer = len(mus['train'])

    # test the distance thing
    print('model before training', MODEL)
    for layer in range(n_layer - 1):
        distribution = list()
        for batch in range(mus['test'][layer].shape[1]):
            for n in range(64):
                tester1 = mus['test'][layer][0, batch, n]
                d1 = (tester1 - dataset['images/test_set'])**2
                index = pl.where(
                    pl.argsort(d1.sum((1, 2, 3))) == ((64 * 5 * batch) + n))[0]
                distribution.append(index)
        print(pl.mean(distribution), end=', ')
    print(';;', len(distribution))

    print('model after training', MODEL)
    for layer in range(n_layer - 1):
        distribution = list()
        for batch in range(mus['test'][layer].shape[1]):
            for n in range(64):
                tester1 = mus['test'][layer][1, batch, n]
                d1 = (tester1 - dataset['images/test_set'])**2
                index = pl.where(
                    pl.argsort(d1.sum((1, 2, 3))) == ((64 * 5 * batch) + n))[0]
                distribution.append(index)
        print(layer, pl.mean(distribution), end=', ')
Exemple #48
0
    def tf_idf(self):
        """ Normalize the frequency of stock name in the articles"""
        print "Initiating the word frequency test..."


        list_of_prepositions = [u'above', u'about', u'across', u'against',u'The', u'is',u'an',
                                u'along', u'among', u'around', u'at', u'before',
                                u'behind', u'below', u'beneath', u'beside', u'between',
                                u'beyond', u'by', u'during', u'except',
                                u'for', u'from',u'in', u'inside', u'into',u'like',u'near',u'of',
                                u'off', u'on',u'since',u'to', u'toward', u'through',u'under', u'until', u'up', u'upon',
                                u'with', u'within', u'the', u'a', u'and', u'for']


        try:
            #Zipf Distribution to assess threshold
            paragraphs = " ".join(self.targeted_paragraphs)
            words = paragraphs.split()
            frequency = Counter(x for x in words if x not in list_of_prepositions)
            counts = array(frequency.values())
            tokens = frequency.keys()

            ranks = arange(1, len(counts)+1)
            indices = argsort(-counts)
            frequencies = counts[indices]
            loglog(ranks, frequencies, marker=".")
            title("Zipf plot for Combined Article Paragraphs")
            xlabel("Frequency Rank of Token")
            ylabel("Absolute Frequency of Token")
            grid(True)
            for n in list(logspace(-0.5, log10(len(counts)-1), 20).astype(int)):
                dummy = text(ranks[n], frequencies[n], " " + tokens[indices[n]],
                verticalalignment="bottom",
                horizontalalignment="left")
            #plt.plot(np.unique(ranks), np.poly1d(np.polyfit(ranks, frequencies, 10))(np.unique(ranks)))
            slope=np.polyfit(ranks, frequencies, 0)
            for value in slope:
                print value
                zipf_threshold = value
            ## NEED TO ADD- FITTED LINE. Due to an error in the value of threshold its currently not in-use while paragraph extraction.##
        except Exception as e:
            print "Error occurred during Zipf Distribution: " + str(e)



        #Inverse-document Frequency
        document_size = self.result_scan_web["article_count"]
        document_success = len(self.article_url)
        try:
            idf = math.log(float(document_size)/float(document_success))
        except Exception as e:
            print "No Successful Articles in this webpage: " + str(e)

        #Term Frequency
        list_total_count = []
        for paragraph in self.targeted_paragraphs:
            words = paragraph.split()
            word_frequency = Counter(x for x in words if x not in list_of_prepositions)
            stock_frequency = word_frequency[self.stock_name]
            list_total_count.append(stock_frequency)

        total_count = sum(list_total_count)
        print "TOTAL WORD COUNT:" , total_count

        print "COUNT BEFORE:"
        print "urls = " , len(self.article_url)

        list_fail_values = []
        for paragraph in self.targeted_paragraphs:
            words = paragraph.split()
            word_frequency = Counter(x for x in words if x not in list_of_prepositions)
            stock_frequency = word_frequency[self.stock_name]
            ticker_symbol_frequency = word_frequency[self.stock_id]
            print "TERM FREQUENCY: ", stock_frequency
            print "DIVISION: " , (float(stock_frequency)/float(total_count))
            try:
                tf_idf_weight = (float(stock_frequency)/float(total_count)) * float(idf)
            except Exception as e:
                print "Error ocurred during division. total_count is zero, meaning no successful articles: " + str(e)
            print "TF-IDF-WEIGHT = " , tf_idf_weight
            if tf_idf_weight <= 0.30: # Gives the articles that lie within the 30th percentile of success.
                index = self.targeted_paragraphs.index(paragraph)
                self.targeted_paragraphs.remove(paragraph)

                url_value = self.article_url[index]
                title_value = self.article_title[index]
                fail_dict = {'url': url_value , 'title' : title_value}
                list_fail_values.append(fail_dict)

                print "IRRELEVANT ARTICLE DETECTED..."
                print "DELETING " , url_value
                del self.article_url[index]
                del self.article_title[index]

            else:
                pass

        print "COUNT AFTER: "
        print "urls = " , len(self.article_url)


        print "FAILED ARTICLES: "
        print "# "  , len(list_fail_values)
        print "articles:" , list_fail_values

        #titles_in_fail = {d['title'] for d in list_fail_values if 'title' in d}
        #self.json_results[:] = [d for d in self.json_results if d.get('title') not in titles_in_fail]

        self.json_results[:] = [i for i in self.json_results if i not in list_fail_values]
        self.results_tone_analyzer["successful_articles"] = self.json_results

        print " ----------------------------"
        print "Number of Successful Articles = " , len(self.article_url)
        print "SUCCESSFUL URLs: " ,
        for article in self.article_url:
            print article
        print " ----------------------------"
def category_dists(df, categories, outdir=FIGS_DIR, fig_suffix=None,\
                   category_subset=None, log_plot=True, **kwargs):
    for category in categories:
        df2 = df.drop_duplicates(
            subset=list(set(['neuron name', 'neuron type', category])))

        if category_subset != None:
            df2 = df2[df2[category].isin(category_subset)]
        else:
            df2 = remove_small_counts(df2, category,\
                                      min_count=CATEGORY_MIN_COUNTS[category])

        if category_subset != None:
            df2 = df2[df2[category].isin(category_subset)]

        if log_plot:
            df2['dist'] = pylab.log10(df2['dist'])

        cat_vals = []
        cat_means = []

        print "-----------------------------------------------------"
        for cat_val, group in df2.groupby(category):
            dist = pylab.array(group['dist']).copy()

            cat_vals.append(cat_val)
            cat_mean = pylab.mean(dist)
            cat_means.append(cat_mean)

            if log_plot:
                dist = 10**dist
            print cat_val, pylab.mean(dist), "+/-", pylab.std(dist, ddof=1)

        order = pylab.argsort(cat_means)
        cat_vals = pylab.array(cat_vals)
        cat_means = pylab.array(cat_means)
        sorted_vals = cat_vals[order]
        sorted_means = cat_means[order]

        pylab.figure()
        sns.set()

        dist_plot = sns.barplot(x=category,
                                y='dist',
                                data=df2,
                                order=sorted_vals)

        pylab.xticks(rotation=75, size=20)
        pylab.yticks(size=20)

        #pylab.xlabel(category, size=20)
        dist_plot.xaxis.label.set_visible(False)
        ylab = 'Distance to Pareto front'
        if log_plot:
            ylab = 'log(' + ylab + ')'
        pylab.ylabel(ylab, size=20)

        if 'ymin' in kwargs:
            pylab.ylim(ymin=kwargs['ymin'])
        if 'ymax' in kwargs:
            pylab.ylim(ymax=kwargs['ymax'])

        pylab.tight_layout()

        fname = 'pareto_dists_%s' % category.replace(' ', '_')
        if fig_suffix != None:
            fname += '_%s' % fig_suffix
        pylab.savefig('%s/%s.pdf' % (outdir, fname), bbox_inches='tight')
def raster_tuning(ax):

    fullbehaviorDir = behaviorDir + subject + '/'
    behavName = subject + '_tuning_curve_' + tuningBehavior + '.h5'
    tuningBehavFileName = os.path.join(fullbehaviorDir, behavName)

    tuning_bdata = loadbehavior.BehaviorData(tuningBehavFileName,
                                             readmode='full')
    freqEachTrial = tuning_bdata['currentFreq']
    possibleFreq = np.unique(freqEachTrial)
    numberOfTrials = len(freqEachTrial)

    # -- The old way of sorting (useful for plotting sorted raster) --
    sortedTrials = []
    numTrialsEachFreq = [
    ]  #Used to plot lines after each group of sorted trials
    for indf, oneFreq in enumerate(
            possibleFreq
    ):  #indf is index of this freq and oneFreq is the frequency
        indsThisFreq = np.flatnonzero(
            freqEachTrial == oneFreq)  #this gives indices of this frequency
        sortedTrials = np.concatenate(
            (sortedTrials,
             indsThisFreq))  #adds all indices to a list called sortedTrials
        numTrialsEachFreq.append(
            len(indsThisFreq))  #finds number of trials each frequency has
    sortingInds = argsort(
        sortedTrials)  #gives array of indices that would sort the sortedTrials

    # -- Load event data and convert event timestamps to ms --
    tuning_ephysDir = os.path.join(settings.EPHYS_PATH, subject, tuningEphys)
    tuning_eventFilename = os.path.join(tuning_ephysDir, 'all_channels.events')
    tuning_ev = loadopenephys.Events(
        tuning_eventFilename)  #load ephys data (like bdata structure)
    tuning_eventTimes = np.array(
        tuning_ev.timestamps
    ) / SAMPLING_RATE  #get array of timestamps for each event and convert to seconds by dividing by sampling rate (Hz). matches with eventID and
    tuning_evID = np.array(
        tuning_ev.eventID
    )  #loads the onset times of events (matches up with eventID to say if event 1 went on (1) or off (0)
    tuning_eventOnsetTimes = tuning_eventTimes[
        tuning_evID ==
        1]  #array that is a time stamp for when the chosen event happens.
    #ev.eventChannel woul load array of events like trial start and sound start and finish times (sound event is 0 and trial start is 1 for example). There is only one event though and its sound start
    while (numberOfTrials < len(tuning_eventOnsetTimes)):
        tuning_eventOnsetTimes = tuning_eventOnsetTimes[:-1]

    #######################################################################################################
    ###################THIS IS SUCH A HACK TO GET SPKDATA FROM EPHYSCORE###################################
    #######################################################################################################

    thisCell = celldatabase.CellInfo(
        animalName=subject,  ############################################
        ephysSession=tuningEphys,
        tuningSession='DO NOT NEED THIS',
        tetrode=tetrode,
        cluster=cluster,
        quality=1,
        depth=0,
        tuningBehavior='DO NOT NEED THIS',
        behavSession=tuningBehavior)

    tuning_spkData = ephyscore.CellData(thisCell)
    tuning_spkTimeStamps = tuning_spkData.spikes.timestamps

    (tuning_spikeTimesFromEventOnset, tuning_trialIndexForEachSpike,
     tuning_indexLimitsEachTrial) = spikesanalysis.eventlocked_spiketimes(
         tuning_spkTimeStamps, tuning_eventOnsetTimes, tuning_timeRange)

    #print 'numTrials ',max(tuning_trialIndexForEachSpike)#####################################
    '''
        Create a vector with the spike timestamps w.r.t. events onset.

        (spikeTimesFromEventOnset,trialIndexForEachSpike,indexLimitsEachTrial) = 
            eventlocked_spiketimes(timeStamps,eventOnsetTimes,timeRange)

        timeStamps: (np.array) the time of each spike.
        eventOnsetTimes: (np.array) the time of each instance of the event to lock to.
        timeRange: (list or np.array) two-element array specifying time-range to extract around event.

        spikeTimesFromEventOnset: 1D array with time of spikes locked to event.
    o    trialIndexForEachSpike: 1D array with the trial corresponding to each spike.
           The first spike index is 0.
        indexLimitsEachTrial: [2,nTrials] range of spikes for each trial. Note that
           the range is from firstSpike to lastSpike+1 (like in python slices)
        spikeIndices
    '''

    tuning_sortedIndexForEachSpike = sortingInds[
        tuning_trialIndexForEachSpike]  #Takes values of trialIndexForEachSpike and finds value of sortingInds at that index and makes array. This array gives an array with the sorted index of each trial for each spike

    # -- Calculate tuning --
    #nSpikes = spikesanalysis.spiketimes_to_spikecounts(spikeTimesFromEventOnset,indexLimitsEachTrial,responseRange) #array of the number of spikes in range for each trial
    '''Count number of spikes on each trial in a given time range.

           spikeTimesFromEventOnset: vector of spikes timestamps with respect
             to the onset of the event.
           indexLimitsEachTrial: each column contains [firstInd,lastInd+1] of the spikes on a trial.
           timeRange: time range to evaluate. Spike times exactly at the limits are not counted.

           returns nSpikes
    '''
    '''
    meanSpikesEachFrequency = np.empty(len(possibleFreq)) #make empty array of same size as possibleFreq

    # -- This part will be replace by something like behavioranalysis.find_trials_each_type --
    trialsEachFreq = []
    for indf,oneFreq in enumerate(possibleFreq):
        trialsEachFreq.append(np.flatnonzero(freqEachTrial==oneFreq)) #finds indices of each frequency. Appends them to get an array of indices of trials sorted by freq

    # -- Calculate average firing for each freq --
    for indf,oneFreq in enumerate(possibleFreq):
        meanSpikesEachFrequency[indf] = np.mean(nSpikes[trialsEachFreq[indf]])
    '''
    #clf()
    #if (len(tuning_spkTimeStamps)>0):
    #ax1 = plt.subplot2grid((4,4), (3, 0), colspan=1)
    #spikesorting.plot_isi_loghist(spkData.spikes.timestamps)
    #ax3 = plt.subplot2grid((4,4), (3, 3), colspan=1)
    #spikesorting.plot_events_in_time(tuning_spkTimeStamps)
    #samples = tuning_spkData.spikes.samples.astype(float)-2**15
    #samples = (1000.0/tuning_spkData.spikes.gain[0,0]) *samples
    #ax2 = plt.subplot2grid((4,4), (3, 1), colspan=2)
    #spikesorting.plot_waveforms(samples)
    #ax4 = plt.subplot2grid((4,4), (0, 0), colspan=3,rowspan = 3)
    plot(tuning_spikeTimesFromEventOnset,
         tuning_sortedIndexForEachSpike,
         '.',
         ms=3)
    #axvline(x=0, ymin=0, ymax=1, color='r')

    #The cumulative sum of the list of specific frequency presentations,
    #used below for plotting the lines across the figure.
    numTrials = cumsum(numTrialsEachFreq)

    #Plot the lines across the figure in between each group of sorted trials
    for indf, num in enumerate(numTrials):
        ax.axhline(y=num, xmin=0, xmax=1, color='0.90', zorder=0)

    tickPositions = numTrials - mean(numTrialsEachFreq) / 2
    tickLabels = [
        "%0.2f" % (possibleFreq[indf] / 1000)
        for indf in range(len(possibleFreq))
    ]
    ax.set_yticks(tickPositions)
    ax.set_yticklabels(tickLabels)
    ax.set_ylim([-1, numberOfTrials])
    ylabel('Frequency Presented (kHz), {} total trials'.format(numTrials[-1]))
    #title(ephysSession+' T{}c{}'.format(tetrodeID,clusterID))
    xlabel('Time (sec)')
    '''

    ax5 = plt.subplot2grid((4,4), (0, 3), colspan=1,rowspan=3)
    ax5.set_xscale('log')
    plot(possibleFreq,meanSpikesEachFrequency,'o-')
    ylabel('Avg spikes in window {0}-{1} sec'.format(*responseRange))
    xlabel('Frequency')
    '''
    #show()
    '''
Exemple #51
0
def plot_alot(signal_at_elec, cell, t_range, Mea, out_name):
    pl.close('all')
    n_elecs = Mea.n_elecs
    electrode_separation = Mea.electrode_separation
    t_array = cell.tvec
    fig = pl.figure(figsize=[10, 10])
    ax = fig.add_axes([0.25, 0.05, 0.7, .9], frameon=False)
    ax.axis([-0.3,0.6,-0.2,1.1])
    #ax.axis('equal')
    n_compartments = len(cell.diam)#neuron.total_n_compartments
    stim_points = [615, 671,623, 628] 
    comp_list = np.r_[0, 615, 646, 671, 623, 628, 657]
    for i in xrange(n_compartments):
        if i == 0:
            xcoords = pl.array([cell.xmid[i]/1000])
            ycoords = pl.array([cell.ymid[i]/1000])
            zcoords = pl.array([cell.zmid[i]/1000])
            diams = pl.array([cell.diam[i]/1000])
        else:
            if cell.zmid[i] < 100 and cell.zmid[i] > -100 and \
                    cell.xmid[i] < 100 and cell.xmid[i] > -100:
                xcoords = pl.r_[xcoords, pl.linspace(cell.xstart[i], \
                            cell.xend[i], cell.length[i]*3)/1000]
                ycoords = pl.r_[ycoords, pl.linspace(cell.ystart[i], \
                            cell.yend[i], cell.length[i]*3)/1000]   
                zcoords = pl.r_[zcoords, pl.linspace(cell.zstart[i], \
                            cell.zend[i], cell.length[i]*3)/1000]   
                diams = pl.r_[diams, pl.linspace(cell.diam[i], \
                            cell.diam[i],cell.length[i]*3)]
    argsort = pl.argsort(-xcoords)
    ax.scatter(zcoords[argsort], ycoords[argsort], s=3*diams[argsort]**2,\
               edgecolors='none', c='gray')
    ax.scatter(cell.zmid[0]/1000, cell.ymid[0]/1000, \
               s=3*cell.diam[0]**2, edgecolors='none', c='gray')    
    ax.scatter(Mea.elec_z, Mea.elec_y, color='m', s= 50)
    for comp in stim_points:
        ax.plot(cell.zmid[comp]/1000, cell.ymid[comp]/1000, '*', color='y')


    t_start = t_range[0]
    t_stop = t_range[1]
    t_start_index = np.abs(t_array[:] - t_start).argmin()
    t_stop_index = np.abs(t_array[:] - t_stop).argmin()
    t_array = t_array[t_start_index:t_stop_index] - t_array[t_start_index]
    signal_at_elec = signal_at_elec[:,t_start_index:t_stop_index]
    
    #chosen_imem = chosen_imem[t_start_index:t_stop_index]
    time_factor = 0.7*1./(t_stop - t_start)*electrode_separation/2
    signal_range = 15
    pot_factor  = 0.5*electrode_separation/signal_range
    
    for elec in xrange(n_elecs):
        t = t_array*time_factor + Mea.elec_z[elec]
        trace = (signal_at_elec[elec])*pot_factor + Mea.elec_y[elec]  
        ax.plot(t, trace, color='r', lw = 3)

    #pl.figure()
    #pl.plot(t_array, chosen_imem, color='r', lw = 3)
    #pl.plot(t_array[130:-95], chosen_imem[130:-95], color='k', lw = 3)
        
    #ax.axis([-0.1, 0.1, -0.1, 0.1])           
    #ax.axis([1.1*np.min(Mea.elec_z),1.1*np.max(Mea.elec_z) , 1.1*np.min(Mea.elec_y),1.1*np.max(Mea.elec_y) ])
    
    ax.plot([0.2, 0.2 + time_factor *(t_stop - t_start)], \
            [-0.13, -0.13], color='k', lw = 4)
    ax.text(0.205, -0.155, '%g ms' % int(t_stop-t_start))
    
    #ax.plot([0.305, 0.355], [0.01, 0.01], color='k', lw = 4)
    #ax.text(0.32, 0.06, '50 $\mu$m')
    
    ax.plot([0.2, 0.2], [-0.13, -0.13 +pot_factor * signal_range],\
            color='r', lw=4)
    ax.text(0.205, -.21 +electrode_separation/2 , '%g $\mu$V' % signal_range)

    #pl.xlabel('z [$\mu$m]')
    #pl.ylabel('y [$\mu$m]')
    ax.set_xticks([])
    ax.set_yticks([])

    ax2 = fig.add_axes([0.1, 0.1, 0.25, 0.2])
    ax2.plot([20],[8], '*', color='k')
    ax2.plot([40],[8], '*', color='k')
    ax2.plot([60],[8], '*', color='k')
    #ax2.text(pos[20,5], pos[1,5]-0.15, "Arrow", ha="center",
    #    family=font, size=14)

    
    #p = ax2.axvspan(20, 21, facecolor='0.5', alpha=0.2)
    #p = ax2.axvspan(40, 41, facecolor='0.5', alpha=0.2)
    #p = ax2.axvspan(60, 61, facecolor='0.5', alpha=0.2)    
    ax2.axis([10,100, -100,20])
    ax2.set_title('Membrane voltage')
    ax2.set_xlabel('Time [ms]')
    ax2.set_ylabel('mV')
    #ax2.plot(cell.tvec[t_start_index:t_stop_index], \
    #         cell.vmem[0,t_start_index:t_stop_index])
    #import random
    #comp = random.choice(cell.get_idx_section('apic[63]'))
    comp_list = np.r_[0, 615, 646, 671, 623, 628, 657]
    for comp in comp_list:
        ax2.plot(cell.tvec[t_start_index:t_stop_index], \
             cell.vmem[comp,t_start_index:t_stop_index])
        
    ax3 = fig.add_axes([0.1, 0.4, 0.25, 0.2])
    ax3.set_title('Membrane currents')
    ax3.set_xlabel('Time [ms]')
    ax3.axis([10,100, -1,1])
    ax3.plot([20],[.8], '*', color='k')
    ax3.plot([40],[.8], '*', color='k')
    ax3.plot([60],[.8], '*', color='k')
    
    ax3.set_ylabel('nA')
    #ax2.plot(cell.tvec[t_start_index:t_stop_index], \
    #         cell.vmem[0,t_start_index:t_stop_index])
    #import random
    #comp = random.choice(cell.get_idx_section('apic[63]'))
    
    for comp in xrange(len(cell.imem[:,0])):#comp_list:
        ax3.plot(cell.tvec[t_start_index:t_stop_index], \
             cell.imem[comp,t_start_index:t_stop_index])

    ax4 = fig.add_axes([0.1, 0.7, 0.25, 0.2])
    ax4.plot([20],[8], '*', color='k')
    ax4.plot([40],[8], '*', color='k')
    ax4.plot([60],[8], '*', color='k')

    ax4.set_title('Signal at electrodes')
    ax4.set_xlabel('Time [ms]')
    ax4.set_ylabel('$\mu$V')
    ax4.axis([10,100, -15,10])
    for elec in xrange(Mea.n_elecs):
        ax4.plot(cell.tvec[t_start_index:t_stop_index], \
             signal_at_elec[elec,:])
    pl.savefig(out_name) 
Exemple #52
0
def entropy(points, logp, N_entropy=10000, N_norm=2500):
    r"""
    Return entropy estimate and uncertainty from a random sample.

    *points* is a set of draws from an underlying distribution, as returned
    by a Markov chain Monte Carlo process for example.

    *logp* is the log-likelihood for each draw.

    *N_norm* is the number of points $k$ to use to estimate the posterior
    density normalization factor $P(D) = \hat N$, converting
    from $\log( P(D|M) P(M) )$ to $\log( P(D|M)P(M)/P(D) )$. The relative
    uncertainty $\Delta\hat S/\hat S$ scales with $\sqrt{k}$, with the
    default *N_norm=2500* corresponding to 2% relative uncertainty.
    Computation cost is $O(nk)$ where $n$ is number of points in the draw.

    *N_entropy* is the number of points used to estimate the entropy
    $\hat S = - \int P(M|D) \log P(M|D)$ from the normalized log likelihood
    values.
    """

    # Use a random subset to estimate density
    if N_norm >= len(logp):
        norm_points = points
    else:
        idx = permutation(len(points))[:N_entropy]
        norm_points = points[idx]

    # Use a different subset to estimate the scale factor between density
    # and logp.
    if N_entropy is None:
        N_entropy = 10000
    if N_entropy >= len(logp):
        entropy_points, eval_logp = points, logp
    else:
        idx = permutation(len(points))[:N_entropy]
        entropy_points, eval_logp = points[idx], logp[idx]

    """
    # Try again, just using the points from the high probability regions
    # to determine the scale factor
    N_norm = min(len(logp), 5000)
    N_entropy = int(0.8*N_norm)
    idx = np.argsort(logp)
    norm_points = points[idx[-N_norm:]]
    entropy_points = points[idx[-N_entropy:]]
    eval_logp = logp[idx[-N_entropy:]]
    """

    # Normalize p to a peak probability of 1 so that exp() doesn't underflow.
    #
    # This should be okay since for the normalizing constant C:
    #
    #      u' = e^(ln u + ln C) = e^(ln u)e^(ln C) = C u
    #
    # Using eq. 11 of Kramer with u' substituted for u:
    #
    #      N_est = < u'/p > = < C u/p > = C < u/p >
    #
    #      S_est = - < ln q >
    #            = - < ln (u'/N_est) >
    #            = - < ln C + ln u - ln (C <u/p>) >
    #            = - < ln u + ln C - ln C - ln <u/p> >
    #            = - < ln u - ln <u/p> >
    #            = - < ln u > + ln <u/p>
    #
    # Uncertainty comes from eq. 13:
    #
    #      N_err^2 = 1/(k-1) sum( (u'/p - <u'/p>)^2 )
    #              = 1/(k-1) sum( (C u/p - <C u/p>)^2 )
    #              = C^2 std(u/p)^2
    #      S_err = std(u'/p) / <u'/p> = (C std(u/p))/(C <u/p>) = std(u/p)/<u/p>
    #
    # So even though the constant C shows up in N_est, N_err, it cancels
    # again when S_est, S_err is formed.
    log_scale = np.max(eval_logp)
    # print("max log sample: %g"%log_scale)
    eval_logp -= log_scale

    # Compute entropy and uncertainty in nats
    # Note: if all values are the same in any dimension then we have a dirac
    # functional with infinite probability at every sample point, and the
    # differential entropy estimate will yield H = -inf.
    rho = density(norm_points, entropy_points)
    #print(rho.min(), rho.max(), eval_logp.min(), eval_logp.max())
    frac = exp(eval_logp)/rho
    n_est, n_err = mean(frac), std(frac)
    if n_est == 0.:
        s_est, s_err = -np.inf, 0.
    else:
        s_est = log(n_est) - mean(eval_logp)
        s_err = n_err/n_est
    #print(n_est, n_err, s_est/LN2, s_err/LN2)
    ##print(np.median(frac), log(np.median(frac))/LN2, log(n_est)/LN2)
    if False:
        import pylab
        idx = pylab.argsort(entropy_points[:, 0])
        pylab.figure()
        pylab.subplot(221)
        pylab.hist(points[:, 0], bins=50, normed=True, log=True)
        pylab.plot(entropy_points[idx, 0], rho[idx], label='density')
        pylab.plot(entropy_points[idx, 0], exp(eval_logp+log_scale)[idx], label='p')
        pylab.ylabel("p(x)")
        pylab.legend()
        pylab.subplot(222)
        pylab.hist(points[:, 0], bins=50, normed=True, log=False)
        pylab.plot(entropy_points[idx, 0], rho[idx], label='density')
        pylab.plot(entropy_points[idx, 0], exp(eval_logp+log_scale)[idx], label='p')
        pylab.ylabel("p(x)")
        pylab.legend()
        pylab.subplot(212)
        pylab.plot(entropy_points[idx, 0], frac[idx], '.')
        pylab.xlabel("P[0] value")
        pylab.ylabel("p(x)/kernel density")

    # return entropy and uncertainty in bits
    return s_est/LN2, s_err/LN2
def val_distribution(df, val, categories, plot_func, plot_descriptor,\
                     outdir=FIGS_DIR, fig_suffix=None, category_subset=None,\
                     **kwargs):
    for category in categories:
        subset_cols = ['neuron name', 'neuron type', 'alpha']
        if category != 'neuron type':
            subset_cols.append(category)
        df2 = df.drop_duplicates(subset=subset_cols)

        if category_subset != None:
            df2 = df2[df2[category].isin(category_subset)]
        else:
            df2 = remove_small_counts(df2, category,\
                                      min_count=CATEGORY_MIN_COUNTS[category])

        log_transform = False
        if ('log_transform' in kwargs) and kwargs['log_transform']:
            log_transform = True

        if log_transform:
            df2[val] = pylab.log10(df2[val])

        cat_vals = []
        order_vals = []
        order_val = kwargs['order_val']
        print "-----------------------------------------------------"
        for name, group in df2.groupby(category):
            cat_vals.append(name)
            if order_val == None:
                order_val.append(pylab.median(group[val]))
            else:
                order_vals.append(pylab.mean(group[order_val]))
            print name, val, pylab.mean(group[val]), "+/-", pylab.std(
                group[val], ddof=1)

        cat_vals = pylab.array(cat_vals)
        #mean = pylab.array(medians)
        order = pylab.argsort(order_vals)
        order = cat_vals[order]

        pylab.figure()
        sns.set()
        dist_plot = plot_func(x=category, y=val, data=df2, order=order)
        dist_plot.tick_params(axis='x', labelsize=20, rotation=75)
        dist_plot.tick_params(axis='y', labelsize=20)
        #pylab.xlabel(category, fontsize=20)
        dist_plot.xaxis.label.set_visible(False)

        ylab = None
        if 'ylab' in kwargs:
            ylab = kwargs['ylab']
        else:
            ylab = val
        if log_transform:
            ylab = 'log(' + ylab + ')'
        pylab.ylabel(ylab, fontsize=20)

        pylab.tight_layout()

        fname = '%s_%ss_%s' % (category.replace(' ', '_'),\
                              val.replace(' ', '_'), plot_descriptor)
        if fig_suffix != None:
            fname += '_%s' % fig_suffix
        pylab.savefig('%s/%s.pdf' % (outdir, fname), format='pdf')
        pylab.close()