def log2_oulierfilter(df_by_cell, plot=False):
    log2_df = np.log2(df_by_cell+1)
    top_log2 = find_top_common_genes(log2_df)
    if top_log2.empty:
        print("no common genes found")
        return log2_df, log2_df.transpose()
    log2_df2= pd.DataFrame(pd.to_numeric(log2_df, errors='coerce'))
    log_mean = top_log2.mean(axis=0).sort_values(ascending=False)
    log2_sorted = top_log2.reindex_axis(top_log2.mean(axis=0).sort_values(ascending=False).index, axis=1)
    xticks = []
    keep_col= []
    log2_cutoff = np.average(log2_sorted)-np.std(log2_sorted)
    avg_cutoff = np.average(log2_cutoff)
    for col, m in zip(log2_sorted.columns.tolist(),log2_sorted.mean()):
        if m > avg_cutoff:
            keep_col.append(col)
            xticks.append(col+' '+str("%.2f" % m))
    filtered_df_by_cell = df_by_cell[keep_col]
    filtered_df_by_gene = filtered_df_by_cell.transpose()
    filtered_log2 = np.log2(filtered_df_by_cell[filtered_df_by_cell>0])
    if plot:
        ax = sns.boxplot(data=filtered_log2, whis= .75, notch=True)
        ax = sns.stripplot(x=filtered_log2.columns.values, y=filtered_log2.mean(axis=0), size=4, jitter=True, edgecolor="gray")
        xtickNames = plt.setp(ax, xticklabels=xticks)
        plt.setp(xtickNames, rotation=90, fontsize=9)
        plt.show()
        plt.clf()
        sns.distplot(filtered_log2.mean())
        plt.show()
    log2_expdf_cell = np.log2(filtered_df_by_cell+1)
    log2_expdf_gene = log2_expdf_cell.transpose()
    return log2_expdf_cell, log2_expdf_gene
 def __add__(self, other):
     """
     Addition in real space; an optimization of Manning & Schuetze,
     p. 337 (eq. 9.21)
     
     >>> a_real = .5
     >>> b_real = .25
     >>> a_bw = BitWeight(a_real)
     >>> b_bw = BitWeight(b_real)
     >>> BitWeight.close_enough((a_bw + b_bw).to_real, a_real + b_real)
     True
     >>> (BitWeight(.25) + BitWeight(.25)).to_real
     0.5
     """
     other_bw = other if hasattr(other, "bw") else BitWeight(other)
     if other_bw.bw - self.bw > self.BIG:
         to_return = self.bw
     elif self.bw - other_bw.bw > self.BIG:
         to_return = other_bw.bw
     else:
         if other_bw.bw > self.bw:
             to_return = other_bw.bw - log2(1.0 + exp2(other_bw.bw - self.bw))
         elif other_bw.bw < self.bw:
             to_return = self.bw - log2(exp2(self.bw - other_bw.bw) + 1.0)
         else:
             to_return = other_bw.bw - 1.0
             # not 1 + x_bw.bw as you might think, as BWs are
             # NEGATIVE log-weights
     return BitWeight(to_return, True)
Example #3
0
def SB_MotifTwo(y,binarizeHow='diff'):
    """
    Looks at local motifs in a binary symbolization of the time series, which is performed by a
    given binarization method
    
    Arguments
    ---------

    y: a nitime time-series object, or numpy vector

    """
    
    # Make the input a row vector of numbers:
    y = makeRowVector(vectorize(y))

    # Make binarization on incremental differences:
    if binarizeHow == 'diff':
        yBin = ((np.sign(np.diff(y)))+1.)/2.
    else:
        raise ValueError(binarizeHow)
        
    # Initialize output dictionary
    out = {}
    
    # Where the difference is 0, 1
    r0 = yBin==0
    r1 = yBin==1
    

    out['u'] = np.mean(r1)
    out['d'] = np.mean(r0)
    out['h'] = -(out['u']*np.log2(out['u']) + out['d']*np.log2(out['d']))
    
    return out
Example #4
0
def Classification(theta, tag_dict, feature_dict, features):
    """
    Predict the tag with the value of theta for naive bayes model

    theta value is used to predict the tag
    """
    total = sum(tag_dict.values())
    prob = []
    for tag in range(2):
        tag = str(tag)
        probability = np.log2(tag_dict[tag] / total)
        searchFlag = 1
        for featureID in range(len(features)):
            feature = features[featureID]
            item = (tag, feature, featureID)
            if item not in feature_dict:
                searchFlag = 0
                break
            tmpProb = np.log2(feature_dict[item] / tag_dict[tag])
            probability = probability + tmpProb
        prob.append(probability)
        if searchFlag == 0:
            break
    if searchFlag == 0:
        if tag == '1':
            predict_tag = '0'
        else:
            predict_tag = '1'
    else:
        ratio = 2.0 ** (prob[1] - prob[0])
        if ratio >= theta:
            predict_tag = '1'
        else:
            predict_tag = '0'
    return predict_tag
Example #5
0
def __cqt_filter_fft(sr, fmin, n_bins, bins_per_octave, tuning,
                     filter_scale, norm, sparsity, hop_length=None,
                     window='hann'):
    '''Generate the frequency domain constant-Q filter basis.'''

    basis, lengths = filters.constant_q(sr,
                                        fmin=fmin,
                                        n_bins=n_bins,
                                        bins_per_octave=bins_per_octave,
                                        tuning=tuning,
                                        filter_scale=filter_scale,
                                        norm=norm,
                                        pad_fft=True,
                                        window=window)

    # Filters are padded up to the nearest integral power of 2
    n_fft = basis.shape[1]

    if (hop_length is not None and
            n_fft < 2.0**(1 + np.ceil(np.log2(hop_length)))):

        n_fft = int(2.0 ** (1 + np.ceil(np.log2(hop_length))))

    # re-normalize bases with respect to the FFT window length
    basis *= lengths[:, np.newaxis] / float(n_fft)

    # FFT and retain only the non-negative frequencies
    fft_basis = fft.fft(basis, n=n_fft, axis=1)[:, :(n_fft // 2)+1]

    # sparsify the basis
    fft_basis = util.sparsify_rows(fft_basis, quantile=sparsity)

    return fft_basis, n_fft, lengths
Example #6
0
File: cqt.py Project: dongying/dear
 def walk(self, Q, freq_base=A0, freq_max=C8, hop=0.02, start=0, end=None,
         join_channels=True, win_shape=numpy.hamming):
     ''''''
     #
     Q = int(Q)
     assert Q > 1
     #
     samplerate = self.audio.samplerate
     if not freq_max: freq_max = samplerate/2.0
     assert 1 <= freq_base <= freq_max <= samplerate/2.0
     #
     step = int(samplerate * hop)
     win = int(round(Q * float(samplerate) / freq_base))
     assert 0 < step <= win 
     #
     k_max = int(numpy.log2(float(freq_max)/freq_base) \
             / numpy.log2(float(Q+1)/Q))
     #
     var = self.pre_calculate(Q, k_max, win, win_shape)
     print len(var.WL), var.WL
     fqs = []
     for wl in var.WL:
         fqs.append("%.2f" % (float(samplerate) / wl * Q))
     print fqs
     transform = self.transform
     #
     for samples in self.audio.walk(win, step, start, end, join_channels):
         if join_channels:
             yield transform(samples, Q, k_max, pre_var=var)
         else:
             yield [transform(ch,Q,k_max,pre_var=var) \
                     for ch in samples]
def mutual_info(X, Y):
    """ Calculate the mutual information between X and Y.

        Note: The input X, Y may look like this:

            X = [event1, event2, event3, ..., eventn]
            Y = [event1, event2, event3, ..., eventn]

        Every event_i represents the outcome of that random variable.

        Args:
            X: An array-like random variable.
            Y: An array-like random variable.

        Returns:
            The mutual information score between X and Y.
    """
    X_np = np.array(X)
    Y_np = np.array(Y)

    P_t1 = np.average(X_np)
    P_t0 = 1 - P_t1
    P_c1 = np.average(Y_np)
    P_c0 = 1 - P_c1
    P_t1c1 = np.average(X_np & Y_np)
    P_t1c0 = np.average(X_np & ~Y_np)
    P_t0c1 = np.average(~X_np & Y_np)
    P_t0c0 = np.average(~X_np & ~Y_np)

    mi = P_t1c1 * np.log2(P_t1c1 / (P_t1 * P_c1)) +\
        P_t1c0 * np.log2(P_t1c0 / (P_t1 * P_c0)) +\
        P_t0c1 * np.log2(P_t0c1 / (P_t0 * P_c1)) +\
        P_t0c0 * np.log2(P_t0c0 / (P_t0 * P_c0))

    return mi if not np.isnan(mi) else 0
Example #8
0
    def __entropy__(self, attribute, sample, free=None):
        """
        Calculates the entropy of a given attribute.
        :type attribute: str
        :param attribute: The attribute name.

        :type free: str
        :param free: optional -- If the attribute is dependent on other attribute. In this case,
            it shall be provided the name of the free attribute.

        :rtype: tuple
        :return: A tuple containing the name of the attribute alongside its entropy.
        """
        if not free:
            return attribute, -1. * np.sum(
                map(
                    lambda x: (float(x) / len(sample)) * np.log2(float(x) / len(sample)),
                    Counter(map(lambda y: y.nodes[attribute].color, sample)).values()
                )
            )
        else:
            conditionals = Counter(map(lambda x: (x.nodes[attribute].color, x.nodes[free].color), sample))

            entropy = 0.
            for value in set(
                    map(lambda x: x[0], conditionals.keys())):  # iterates over the values of the conditioned attribute
                marginal = self.__marginalize__(conditionals, value)
                entropy += marginal * np.log2(marginal)

            return (attribute, free), -1. * entropy
Example #9
0
def hurstexp_welchper(data, samp=1.05, f_max=0, consider_fBm=False):
    """
    These functions compute the Hurst exponent of a signal using the
    Welch periodogram
    data : your signal
    samp : sampling rate in Hz 1 for an fMRI series
    f_max: the higher frequency you want to take into account
    """
    #data could be two dimensional(but no more...) in that cas time should
    #be on second position
    time_series = TimeSeries(data=data, sampling_rate=samp)
    spectral_analysis = SpectralAnalyzer(time_series)
    frq, pwr = spectral_analysis.psd
    #We need to take only the small frequency, but the exact choice is a
    #bit arbitrary we need to have alpha between 0 and 1
    if f_max==0:
        masker = frq > 0
    else:
        masker = np.all([(frq > 0), (frq < f_max)], axis=0)
    log2frq = np.log2(frq[masker])
    log2pwr = np.log2(pwr.T[masker])
    tmp = np.polyfit(log2frq, log2pwr, deg=1)
    if consider_fBm:
        return (1 - tmp[0]) / 4, {'aest': tmp[1], 'log2frq': log2frq, 'log2pwr': log2pwr}
    return (1 - tmp[0]) / 2, {'aest': tmp[1], 'log2frq': log2frq, 'log2pwr': log2pwr}
	def __lazy_cost_function__(H, Y):
		result = 0.0
		for i in range(0, Y.shape[0]):
			a = np.nan_to_num(np.log2(H[i]) * Y[i])
			b = np.nan_to_num((1. - Y[i]) * np.log2((1. - H[i])))
			result += a + b
		return result
Example #11
0
 def getBits(self,cell):
         zero=[-self.markerArea[i]/2. for i in [0,1]]
         bitx=[int(i) for i in bin(int(cell[0]))[::-1][:-2]]
         bity=[int(i) for i in bin(int(cell[1]))[::-1][:-2]]
         s0=int(np.log2(self.cellsPerBlock[0]*self.noBlocks[0]))
         s1=int(np.log2(self.cellsPerBlock[1]*self.noBlocks[1]))
         for i in range(s0-len(bitx)):
             bitx.append(0)
         for i in range(s1-len(bity)):
             bity.append(0)
         tx=np.zeros(s0,dtype=np.bool)
         ty=np.zeros(s1,dtype=np.bool)
         px=np.empty((s0,2))
         py=np.empty((s1,2))
         for i,b in enumerate(bitx):
             x=zero[0]+mod(i+1,self.noBitsX)*self.bitDistance
             y=zero[1]+((i+1)/self.noBitsY)*self.bitDistance
             px[i]=(x,y)
             tx[i]=b
         for i,b in enumerate(bity):
             x=zero[0]+(self.noBitsX-mod(i+1,self.noBitsX)-1)*self.bitDistance
             y=zero[1]+(self.noBitsY-(i+1)/self.noBitsY-1)*self.bitDistance
             py[i]=(x,y)
             ty[i]=b
         return px,py,tx,ty
    def __init__(self, data,  noDataValue):


        #dataValues = [x[0] for x in dataTable if x[0] <> noDataValue]
        #data = sorted(dataValues)
        d = data[data["DataValue"]!= noDataValue].describe(percentiles = [.10,.25,.5,.75,.90])
        count = self.NumberofObservations = d["DataValue"]["count"]
        self.NumberofCensoredObservations = data[data["CensorCode"]!= "nc"].count()
        self.ArithemticMean = round(d["DataValue"]["mean"], 5)

        sumval = 0
        sign = 1
        for dv in data["DataValue"]:
            if dv == 0:
                sumval = sumval + numpy.log2(1)
            else:
                if dv < 0:
                    sign = sign * -1
                sumval = sumval + numpy.log2(numpy.absolute(dv))

        if count > 0:
            self.GeometricMean = round(sign * (2 ** float(sumval / float(count))), 5)
            self.Maximum = round(d["DataValue"]["max"], 5)
            self.Minimum = round(d["DataValue"]["min"], 5)
            self.StandardDeviation = round(d["DataValue"]["std"], 5)
            self.CoefficientofVariation = round(data[data["DataValue"]!= noDataValue].var(), 5)

            ##Percentiles
            self.Percentile10 = round(d["DataValue"]["10%"], 5)
            self.Percentile25 = round(d["DataValue"]["25%"], 5)
            self.Percentile50 = round(d["DataValue"]["50%"], 5)
            self.Percentile75 = round(d["DataValue"]["75%"], 5)
            self.Percentile90 = round(d["DataValue"]["90%"], 5)
Example #13
0
def rms_total(x, window_size=256):
	n_windows = int(pow(2,numpy.log2(len(x))-numpy.log2(window_size)))
	rms_tot = numpy.zeros(n_windows)
	for i in range(n_windows):
		w = x[i*window_size:(i+1)*window_size]
		rms_tot[i] = rms(w,window_size)
	return rms_tot
Example #14
0
 def stop(k,k_l,k_r):
   gain =  E-T[T_min]
   def count(lst): return len(Counter(lst).keys())
   delta = np.log2(float(3**count(k)-2)) - (
       count(k)*measure(k)-count(k_l)*measure(k_l)-count(k_r)*measure(k_r))
   # print(gain, (np.log2(N-1)+delta)/N)
   return gain<(np.log2(N-1)+delta)/N or T_min==0
Example #15
0
def prepare_logged(x, y):
    """
    Transform `x` and `y` to a log scale while dealing with zeros.

    This function scales `x` and `y` such that the points that are zero in one
    array are set to the min of the other array.

    When plotting expression data, frequently one sample will have reads in
    a particular feature but the other sample will not.  Expression data also
    tends to look better on a log scale, but log(0) is undefined and therefore
    cannot be shown on a plot.  This function allows these points to be shown,
    piled up along one side of the plot.

    :param x,y: NumPy arrays
    """
    xi = np.log2(x)
    yi = np.log2(y)

    xv = np.isfinite(xi)
    yv = np.isfinite(yi)

    global_min = min(xi[xv].min(), yi[yv].min())
    global_max = max(xi[xv].max(), yi[yv].max())

    xi[~xv] = global_min
    yi[~yv] = global_min

    return xi, yi
Example #16
0
def hz_to_midi(frequencies):
    """Get MIDI note number(s) for given frequencies

    Examples
    --------
    >>> librosa.hz_to_midi(60)
    34.506
    >>> librosa.hz_to_midi([110, 220, 440])
    array([ 45.,  57.,  69.])

    Parameters
    ----------
    frequencies   : float or np.ndarray [shape=(n,), dtype=float]
        frequencies to convert

    Returns
    -------
    note_nums     : number or np.ndarray [shape=(n,), dtype=float]
        MIDI notes to `frequencies`

    See Also
    --------
    midi_to_hz
    note_to_midi
    hz_to_note
    """

    return 12 * (np.log2(np.asanyarray(frequencies)) - np.log2(440.0)) + 69
Example #17
0
def means2idxarrays(g1, g2, i_bins, c_bins, difference):
    '''take two arrays of values and return the initial values
    and differences as numpy digitised arrays'''

    if difference == "relative":
        # calculate difference between mean values for group1 and group2
        # g1 and g2 always the same length
        change = [g2[x] - g1[x] for x in range(0, len(g1))]
        initial = g1

    elif difference == "logfold":
        change = [np.log2((g2[x] + 1.0) / (g1[x] + 1.0))
                  for x in range(0, len(g1))]
        initial = [np.log2(g1[x] + 1.0) for x in range(0, len(g1))]

    elif difference == "abs_logfold":
        change = [abs(np.log2((g2[x] + 1.0) / (g1[x] + 1.0)))
                  for x in range(0, len(g1))]
        initial = [max(np.log2(g1[x] + 1.0), np.log2(g2[x] + 1.0))
                   for x in range(0, len(g1))]

    # return arrays of len(change) with the index position in c_bins
    # corresponding to the bin in which the value of change falls
    change_idx = np.digitize(change, c_bins, right=True)
    initial_idx = np.digitize(initial, i_bins, right=True)

    return(change_idx, initial_idx)
Example #18
0
File: oracle.py Project: himito/vmo
    def _ir_cum(self, alpha=1.0):
        code, _ = self.encode()

        N = self.n_states

        cw0 = np.zeros(N - 1)  # cw0 counts the appearance of new states only
        cw1 = np.zeros(N - 1)  # cw1 counts the appearance of all compror states
        BL = np.zeros(N - 1)  # BL is the block length of compror codewords

        j = 0
        for i in range(len(code)):
            if self.code[i][0] == 0:
                cw0[j] = 1
                cw1[j] = 1
                BL[j] = 1
                j += 1
            else:
                L = code[i][0]
                cw1[j] = 1
                BL[j:j + L] = L  # range(1,L+1)
                j = j + L

        h0 = np.log2(np.cumsum(cw0))
        h1 = np.log2(np.cumsum(cw1))
        h1 = h1 / BL
        ir = alpha * h0 - h1
        ir[ir < 0] = 0

        return ir, h0, h1
Example #19
0
File: oracle.py Project: himito/vmo
    def _ir_cum2(self, alpha=1.0):
        code, _ = self.encode()

        N = self.n_states
        BL = np.zeros(N - 1)  # BL is the block length of compror codewords

        h0 = np.log2(np.cumsum(
            [1.0 if sfx == 0 else 0.0 for sfx in self.sfx[1:]])
        )
        """
        h1 = np.array([h if m == 0 else h+np.log2(m) 
                       for h,m in zip(h0,self.lrs[1:])])
        h1 = np.array([h if m == 0 else h+np.log2(m) 
                       for h,m in zip(h0,self.max_lrs[1:])])
        h1 = np.array([h if m == 0 else h+np.log2(m) 
                       for h,m in zip(h0,self.avg_lrs[1:])])
        """
        h1 = np.array([np.log2(i + 1) if m == 0 else np.log2(i + 1) + np.log2(m)
                       for i, m in enumerate(self.max_lrs[1:])])

        j = 0
        for i in range(len(code)):
            if self.code[i][0] == 0:
                BL[j] = 1
                j += 1
            else:
                L = code[i][0]
                BL[j:j + L] = L  # range(1,L+1)
                j = j + L

        h1 = h1 / BL
        ir = alpha * h0 - h1
        ir[ir < 0] = 0  # Really a HACK here!!!!!
        return ir, h0, h1
Example #20
0
	def test_quasigraph(self, plot=False):
		sol = self.solver
		errz = []
		errl = []
		ks = np.arange(1,5)
		for k in ks:
			self.scheme.h = pow(2,-k)
			sol.initialize(u0=self.u0,time=1, name='{0}_{1}'.format(type(self).__name__, k))
			sol.run()
			zexact = sol.system.exact(sol.final_time(),self.u0)[0]
			lexact = sol.system.exact(sol.final_time(),self.u0)[2]
			df = sol.final()[0] - zexact
			logerrz = np.log2(np.abs(df))
			logerrl = np.log2(np.abs(sol.final()[2] - lexact))
			errz.append(logerrz)
			errl.append(logerrl)
		plt.clf()
		plt.subplot(1,2,1)
		plt.title('z')
		regz = order.linear_regression(ks,errz,do_plot=True)
		plt.plot(ks,errz,'o-')
		plt.legend()
		plt.subplot(1,2,2)
		plt.title(u'λ')
		regl = order.linear_regression(ks,errl,do_plot=True)
		plt.plot(ks,errl,'o-')
		plt.legend()
		oz = -regz[0]
		ol = -regl[0]
		nt.assert_greater(ol, self.expected_orders[0] - self.tol)
		nt.assert_greater(oz, self.expected_orders[1] - self.tol)
		return sol
Example #21
0
def calc_MI_cate_feat_target(column, target, num_bins):

    vals, tmp_indexer = pd.factorize(column, na_sentinel=-1)

    p_neg = 0.238801
    p_pos = 0.761199

    max_cate = np.max(vals)
    densitys, bin_edges = np.histogram(vals, density=True)
    #print densitys


    #print 'start'
    final_mi = 0
    for level in xrange(-1, max_cate+1):
        p_cate_pos = np.sum((vals == level) & (target == 1)) / float(column.shape[0])
        p_cate_neg = np.sum((vals == level) & (target == 0)) / float(column.shape[0])
        p_cate = np.sum((vals == level)) / float(column.shape[0])
        if p_cate_pos == 0 or p_cate_neg == 0:
            continue
        final_mi += p_cate_pos * np.log2(p_cate_pos / (p_cate * p_pos))
        final_mi += p_cate_neg * np.log2(p_cate_neg / (p_cate * p_neg))
        #print '%d, %f' %(level, final_mi)

    return final_mi
def TtoJ(T,Q=1,B=None,phibwratio=None):
    """
    Compute the maximal wavelet scale J such that for a filter bank
    the largest wavelet is of bandwidth approximately T.

    Parameters:
    -----------
    T: int
       Time bandwidth for window
    Q: int
       Number of wavelets per octave
    B: int
       The reciprocal per-octave bandwidth of the wavelets
    phibwratio: float
       ratio between the lowpass filter phi and the lowest-frequency wavelet. Default is 2 if Q is 1 and otherwise 1.

    Returns
    --------
    J: int
       Number of logarithmically spaced wavelets

    """
    if B is None: B = Q
    if phibwratio is None:
        if type(Q) == np.ndarray:
            phibwratio=1.+(Q==1).astype(int)
        else:
            phibwratio=1+int(Q==1)
    
    if type(Q) == np.ndarray:
        return 1+ (np.log2(T/(4*B/phibwratio))*Q+.5).astype(int)
    else:
        return 1+ int(np.log2(T/(4*B/phibwratio))*Q+.5)
Example #23
0
def DFA(indata,scale,q,m):
    y = np.cumsum(indata-indata.mean())             #Equation 1 in paper
    RMSt = []                                       #Temporary RMS variable: contain F(s,v) value
    F = []                                          #F: Fluctuation function
    N = len(indata)
    print 'len indata: ',N
    for i in range(len(scale)):
        ns = int(np.floor(len(y)/scale[i]))         #number of segments: Ns = int(N/s)
        for v in range(2*ns):
            if v < ns:
                index_start = v*scale[i]
                index_end = (v+1)*scale[i]
            else:
                index_start = N - (v-ns)*scale[i]-scale[i]
                index_end = N - (v-ns)*scale[i]
            index = range(index_start,index_end)    #calculate index for each segment
            yv = y[index_start:index_end]           #Extract values of time series for each segments
            c = np.polyfit(index,yv,m)
            fit = np.polyval(c,index)
            RMSt.append(math.sqrt(np.mean((yv-fit)**2))) #Equation 2. But calculating only F(v,s) not F(v,s)**2
        RMS = np.asarray(RMSt)                      #Convert RMSt to array
        qRMS = RMS**q
        F.append(np.mean(qRMS)**(1.0/q))              #Equation 4
        del RMSt[:]                                 #Reset RMSt[:]
    C = np.polyfit(np.log2(scale),np.log2(F),1)
    H = C[0]                                        #Hurst parameter
    return (H,scale,F)
	def CostFunction(self, X, y, regularization_parameter, count_sigmas = False):
		X = np.matrix(X)
		y = np.matrix(y)
		outs = self.FeedForward(X)

		J = 0

		for thetas in self.Thetas:
			J = J + np.sum(np.square(thetas[:, 1]))
		J = J * regularization_parameter
		#linear
		#J = J + np.sum(np.square(outs - y))
		#logistic
		J = J + np.sum(- (np.multiply(y, np.log2(outs)) + np.multiply((1 - y), np.log2(1 - outs))))
		J = J / len(X)
		
		if count_sigmas == False:
			return J

		#linear
		#self.Sigmas[-1] = np.multiply(np.multiply((outs - y), outs), 1 - outs) * 2
		#logistic
		self.Sigmas[-1] = (outs - y) / math.log1p(1)

		for i in reversed(range(0, self.n_hidden_layers)):
			self.Sigmas[i] = np.multiply(self.activations[i + 1][:, 1:], 1 - self.activations[i + 1][:, 1:])
			self.Sigmas[i] = np.multiply(self.Sigmas[i], self.Sigmas[i + 1] * self.Thetas[i + 1][:, 1:]) 

		return J
Example #25
0
def center_zoom(Lngs, Lats):
	# Find the bounding box
	minLon, minLat, maxLon, maxLat = min(Lngs), min(Lats), max(Lngs), max(Lats)
	deltaLon, deltaLat = (maxLon - minLon),  (maxLat - minLat)

	centerLon = minLon + .5*deltaLon
	centerLat = minLat + .5*deltaLat

	zoomxfac = 3600.
	zoomyfac = 2925.
	if deltaLon != 0:
		pixXperdeg =  (512.0/deltaLon)
	else:
		pixXperdeg = 1.
	if deltaLat != 0:
		pixYperdeg =  (512.0/deltaLat)
	else:
		pixYperdeg = 1.

	# conversion to zoom
	dx = pixXperdeg/zoomyfac
	dy = pixYperdeg/zoomyfac
	zx = np.floor(12+np.log2(dx))
	zy = np.floor(12+np.log2(dy))
	zoom = min(zx, zy)
	if zoom < 10:
		zoom = 10
	if zoom > 19:
		zoom = 19
	return centerLon, centerLat, zoom
Example #26
0
def smartirs_wlocal(tf, local_scheme):
    """Calculate local term weight for a term using the weighting scheme specified in `local_scheme`.

    Parameters
    ----------
    tf : int
        Term frequency.
    local : {'n', 'l', 'a', 'b', 'L'}
        Local transformation scheme.

    Returns
    -------
    float
        Calculated local weight.

    """
    if local_scheme == "n":
        return tf
    elif local_scheme == "l":
        return 1 + np.log2(tf)
    elif local_scheme == "a":
        return 0.5 + (0.5 * tf / tf.max(axis=0))
    elif local_scheme == "b":
        return tf.astype('bool').astype('int')
    elif local_scheme == "L":
        return (1 + np.log2(tf)) / (1 + np.log2(tf.mean(axis=0)))
Example #27
0
def smartirs_wglobal(docfreq, totaldocs, global_scheme):
    """Calculate global document weight based on the weighting scheme specified in `global_scheme`.

    Parameters
    ----------
    docfreq : int
        Document frequency.
    totaldocs : int
        Total number of documents.
    global_scheme : {'n', 't', 'p'}
        Global transformation scheme.

    Returns
    -------
    float
        Calculated global weight.

    """

    if global_scheme == "n":
        return 1.
    elif global_scheme == "t":
        return np.log2(1.0 * totaldocs / docfreq)
    elif global_scheme == "p":
        return max(0, np.log2((1.0 * totaldocs - docfreq) / docfreq))
Example #28
0
def _hist_bin_doane(x):
    """
    Doane's histogram bin estimator.

    Improved version of Sturges' formula which works better for
    non-normal data. See
    stats.stackexchange.com/questions/55134/doanes-formula-for-histogram-binning

    Parameters
    ----------
    x : array_like
        Input data that is to be histogrammed, trimmed to range. May not
        be empty.

    Returns
    -------
    h : An estimate of the optimal bin width for the given data.
    """
    if x.size > 2:
        sg1 = np.sqrt(6.0 * (x.size - 2) / ((x.size + 1.0) * (x.size + 3)))
        sigma = np.std(x)
        if sigma > 0.0:
            # These three operations add up to
            # g1 = np.mean(((x - np.mean(x)) / sigma)**3)
            # but use only one temp array instead of three
            temp = x - np.mean(x)
            np.true_divide(temp, sigma, temp)
            np.power(temp, 3, temp)
            g1 = np.mean(temp)
            return x.ptp() / (1.0 + np.log2(x.size) +
                                    np.log2(1.0 + np.absolute(g1) / sg1))
    return 0.0
Example #29
0
def plot_profiles(prots, eluts, sp='Hs', plot_sums=True, shape=None,
        min_count=1):
    """
    shape: (m,n) = m rows, n columns
    eluts: [el.NormElut(f, sp, norm_rows=False, norm_cols=False) for f in
    fs]
    """
    import plotting as pl
    gt = seqs.GTrans()
    use_eluts = elutions_containing_prots(eluts, sp, seqs.names2ids(prots),
            min_count)
    shape = shape if shape else ut.sqrt_shape(len(use_eluts)+1)
    fig = pl.figure()
    for i,e in enumerate(use_eluts):
        sp_target = ut.shortname(e.filename)[:2]
        pl.subplot(shape[0],shape[1],i+1)
        pl.title(ut.shortname(e.filename))
        pids = [gt.name2id[p] for p in prots]
        protsmax = max([np.max(e.normarr[r]) for p in pids if p in e.baseid2inds for
            r in e.baseid2inds[p]])
        plot_prots(e, pids, e.baseid2inds, protsmax)
        if plot_sums:
            # plot total spectral counts normalized to match biggest peak
            sums = np.sum(e.normarr,axis=0)
            fmax = np.max(sums)
            pl.plot(range(sums.shape[1]),
                    np.log2(sums[0,:]).T*np.log2(protsmax)*len(pids)/np.log2(fmax), 
                    color='k', linestyle='-', linewidth=.5)
    # make legend with all prots
    pl.subplot(shape[0],shape[1],0)
    for p in prots: pl.plot(0,label=p)
    pl.legend()
Example #30
0
    def reconstructWPT(self,new_wp,wavelet,listleaves):
        """ Create a new wavelet packet tree by copying in the data for the leaves and then performing
        the idwt up the tree to the root.
        Assumes that listleaves is top-to-bottom, so just reverses it.
        """
        # Sort the list of leaves into order bottom-to-top, left-to-right
        working = listleaves.copy()
        working = working[-1::-1]

        level = int(np.floor(np.log2(working[0] + 1)))
        while level > 0:
            first = 2 ** level - 1
            while working[0] >= first:
                # Note that it assumes that the whole list is backwards
                parent = (working[0] - 1) // 2
                p = self.ConvertWaveletNodeName(parent)

                new_wp[p].data = pywt.idwt(new_wp[self.ConvertWaveletNodeName(working[1])].data,new_wp[self.ConvertWaveletNodeName(working[0])].data, wavelet)[:len(new_wp[p].data)]

                # Delete these two nodes from working
                working = np.delete(working, 1)
                working = np.delete(working, 0)
                # Insert parent into list of nodes at the next level
                ins = np.where(working > parent)
                if len(ins[0]) > 0:
                    ins = ins[0][-1] + 1
                else:
                    ins = 0
                working = np.insert(working, ins, parent)
            level = int(np.floor(np.log2(working[0] + 1)))
        return new_wp
Example #31
0
cost = cost.reshape(target_sequences.shape)
cost = cost * target_mask
# Don't use these lines; could end up with NaN
# Specially at the end of audio files where mask is
# all zero for some of the shorter files in mini-batch.
#cost = cost.sum(axis=1) / target_mask.sum(axis=1)
#cost = cost.mean(axis=0)

# Use this one instead.
cost = cost.sum()
cost = cost / target_mask.sum()

# By default we report cross-entropy cost in bits.
# Switch to nats by commenting out this line:
# log_2(e) = 1.44269504089
cost = cost * lib.floatX(numpy.log2(numpy.e))

ip_cost = lib.floatX(numpy.log2(numpy.e)) * T.nnet.categorical_crossentropy(
    T.nnet.softmax(big_frame_independent_preds.reshape(
        (-1, Q_LEVELS))), target_sequences.flatten())
ip_cost = ip_cost.reshape(target_sequences.shape)
ip_cost = ip_cost * target_mask
ip_cost = ip_cost.sum()
ip_cost = ip_cost / target_mask.sum()

### Getting the params, grads, updates, and Theano functions ###
#params = lib.get_params(cost, lambda x: hasattr(x, 'param') and x.param==True)
#ip_params = lib.get_params(ip_cost, lambda x: hasattr(x, 'param') and x.param==True\
#    and 'BigFrameLevel' in x.name)
#other_params = [p for p in params if p not in ip_params]
#params = ip_params + other_params
Example #32
0
def entropy_histogram(image_array) -> float:
    num_elements = np.size(image_array)
    histogram, _ = np.histogram(image_array, 65535)
    probabilities = histogram / num_elements
    return float(-np.sum(np.multiply(probabilities, np.log2(probabilities, where=probabilities > 0))))
Example #33
0
def calculate_entropy(y):
    entropy = 0
    for value in set(y):
        entropy += -(sum(y == value) /
                     len(y)) * np.log2(sum(y == value) / len(y) + 1.0e-5)
    return entropy
Example #34
0
def kdensityfft(X, kernel="gau", bw="normal_reference", weights=None, gridsize=None,
                adjust=1, clip=(-np.inf, np.inf), cut=3, retgrid=True):
    """
    Rosenblatt-Parzen univariate kernel density estimator

    Parameters
    ----------
    X : array-like
        The variable for which the density estimate is desired.
    kernel : str
        ONLY GAUSSIAN IS CURRENTLY IMPLEMENTED.
        "bi" for biweight
        "cos" for cosine
        "epa" for Epanechnikov, default
        "epa2" for alternative Epanechnikov
        "gau" for Gaussian.
        "par" for Parzen
        "rect" for rectangular
        "tri" for triangular
    bw : str, float
        "scott" - 1.059 * A * nobs ** (-1/5.), where A is min(std(X),IQR/1.34)
        "silverman" - .9 * A * nobs ** (-1/5.), where A is min(std(X),IQR/1.34)
        If a float is given, it is the bandwidth.
    weights : array or None
        WEIGHTS ARE NOT CURRENTLY IMPLEMENTED.
        Optional  weights. If the X value is clipped, then this weight is
        also dropped.
    gridsize : int
        If gridsize is None, min(len(X), 512) is used. Note that the provided
        number is rounded up to the next highest power of 2.
    adjust : float
        An adjustment factor for the bw. Bandwidth becomes bw * adjust.
        clip : tuple
        Observations in X that are outside of the range given by clip are
        dropped. The number of observations in X is then shortened.
    cut : float
        Defines the length of the grid past the lowest and highest values of X
        so that the kernel goes to zero. The end points are
        -/+ cut*bw*{X.min() or X.max()}
    retgrid : bool
        Whether or not to return the grid over which the density is estimated.

    Returns
    -------
    density : array
        The densities estimated at the grid points.
    grid : array, optional
        The grid points at which the density is estimated.

    Notes
    -----
    Only the default kernel is implemented. Weights aren't implemented yet.
    This follows Silverman (1982) with changes suggested by Jones and Lotwick
    (1984). However, the discretization step is replaced by linear binning
    of Fan and Marron (1994). This should be extended to accept the parts
    that are dependent only on the data to speed things up for
    cross-validation.

    References
    ---------- ::

    Fan, J. and J.S. Marron. (1994) `Fast implementations of nonparametric
        curve estimators`. Journal of Computational and Graphical Statistics.
        3.1, 35-56.
    Jones, M.C. and H.W. Lotwick. (1984) `Remark AS R50: A Remark on Algorithm
        AS 176. Kernal Density Estimation Using the Fast Fourier Transform`.
        Journal of the Royal Statistical Society. Series C. 33.1, 120-2.
    Silverman, B.W. (1982) `Algorithm AS 176. Kernel density estimation using
        the Fast Fourier Transform. Journal of the Royal Statistical Society.
        Series C. 31.2, 93-9.
    """
    X = np.asarray(X)
    X = X[np.logical_and(X > clip[0], X < clip[1])] # won't work for two columns.
                                                # will affect underlying data?

    # Get kernel object corresponding to selection
    kern = kernel_switch[kernel]()

    try:
        bw = float(bw)
    except:
        bw = bandwidths.select_bandwidth(X, bw, kern) # will cross-val fit this pattern?
    bw *= adjust

    nobs = len(X) # after trim

    # 1 Make grid and discretize the data
    if gridsize == None:
        gridsize = np.max((nobs, 512.))
    gridsize = 2**np.ceil(np.log2(gridsize)) # round to next power of 2

    a = np.min(X) - cut * bw
    b = np.max(X) + cut * bw
    grid,delta = np.linspace(a, b, int(gridsize), retstep=True)
    RANGE = b - a

#TODO: Fix this?
# This is the Silverman binning function, but I believe it's buggy (SS)
# weighting according to Silverman
#    count = counts(X,grid)
#    binned = np.zeros_like(grid)    #xi_{k} in Silverman
#    j = 0
#    for k in range(int(gridsize-1)):
#        if count[k]>0: # there are points of X in the grid here
#            Xingrid = X[j:j+count[k]] # get all these points
#            # get weights at grid[k],grid[k+1]
#            binned[k] += np.sum(grid[k+1]-Xingrid)
#            binned[k+1] += np.sum(Xingrid-grid[k])
#            j += count[k]
#    binned /= (nobs)*delta**2 # normalize binned to sum to 1/delta

#NOTE: THE ABOVE IS WRONG, JUST TRY WITH LINEAR BINNING
    binned = fast_linbin(X, a, b, gridsize) / (delta * nobs)

    # step 2 compute FFT of the weights, using Munro (1976) FFT convention
    y = forrt(binned)

    # step 3 and 4 for optimal bw compute zstar and the density estimate f
    # don't have to redo the above if just changing bw, ie., for cross val

#NOTE: silverman_transform is the closed form solution of the FFT of the
#gaussian kernel. Not yet sure how to generalize it.
    zstar = silverman_transform(bw, gridsize, RANGE)*y # 3.49 in Silverman
                                                    # 3.50 w Gaussian kernel
    f = revrt(zstar)
    if retgrid:
        return f, grid, bw
    else:
        return f, bw
Example #35
0
def entropy(y):
    y = np.array(y)
    p = [len(y[y == k]) / len(y) for k in np.unique(y)]
    return -np.dot(p, np.log2(p))
Example #36
0
def main(data_path, dataset, seed, _run):
  ex.commands['print_config']()
  np.random.seed(seed)
  reader, (train_data, valid_data, test_data, _) = get_data(data_path, dataset)

  config = get_config()
  val_config = deepcopy(config)
  test_config = deepcopy(config)
  val_config.drop_x = test_config.drop_x = 0.0
  val_config.drop_i = test_config.drop_i = 0.0
  val_config.drop_h = test_config.drop_h = 0.0
  val_config.drop_o = test_config.drop_o = 0.0
  test_config.batch_size = test_config.num_steps = 1

  with tf.Graph().as_default(), tf.Session() as session:
    tf.set_random_seed(seed)
    initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale)
    with tf.variable_scope("model", reuse=None, initializer=initializer):
      mtrain = Model(is_training=True, config=config)
    with tf.variable_scope("model", reuse=True, initializer=initializer):
      mvalid = Model(is_training=False, config=val_config)
      mtest = Model(is_training=False, config=test_config)

    tf.global_variables_initializer().run()

    saver = tf.train.Saver()
    trains, vals, tests, best_val = [np.inf], [np.inf], [np.inf], np.inf

    for i in range(config.max_max_epoch):
      lr_decay = config.lr_decay ** max(i - config.max_epoch + 1, 0.0)
      mtrain.assign_lr(session, config.learning_rate / lr_decay)

      print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(mtrain.lr)))
      train_perplexity = run_epoch(session, mtrain, train_data, mtrain.train_op, config=config, verbose=True, is_training=True)
      print("Epoch: %d Train Perplexity: %.3f, Bits: %.3f" % (i + 1, train_perplexity, np.log2(train_perplexity)))

      valid_perplexity = run_epoch(session, mvalid, valid_data, tf.no_op(), config=val_config)
      print("Epoch: %d Valid Perplexity (batched): %.3f, Bits: %.3f" % (i + 1, valid_perplexity, np.log2(valid_perplexity)))

      test_perplexity = run_epoch(session, mvalid, test_data, tf.no_op(), config=val_config)
      print("Epoch: %d Test Perplexity (batched): %.3f, Bits: %.3f" % (i + 1, test_perplexity, np.log2(test_perplexity)))

      trains.append(train_perplexity)
      vals.append(valid_perplexity)
      tests.append(test_perplexity)

      if valid_perplexity < best_val:
        best_val = valid_perplexity
        print("Best Batched Valid Perplexity improved to %.03f" % best_val)
        save_path = saver.save(session, './' + dataset + "_" + str(seed) + "_best_model.ckpt")
        print("Saved to:", save_path)

      _run.info['epoch_nr'] = i + 1
      _run.info['nr_parameters'] = mtrain.nvars.item()
      _run.info['logs'] = {'train_perplexity': trains, 'valid_perplexity': vals, 'test_perplexity': tests}


    print("Training is over.")
    best_val_epoch = np.argmin(vals)
    print("Best Batched Validation Perplexity %.03f (Bits: %.3f) was at Epoch %d" %
          (vals[best_val_epoch], np.log2(vals[best_val_epoch]), best_val_epoch))
    print("Training Perplexity at this Epoch was %.03f, Bits: %.3f" %
          (trains[best_val_epoch], np.log2(trains[best_val_epoch])))
    print("Batched Test Perplexity at this Epoch was %.03f, Bits: %.3f" %
          (tests[best_val_epoch], np.log2(tests[best_val_epoch])))

    _run.info['best_val_epoch'] = best_val_epoch
    _run.info['best_valid_perplexity'] = vals[best_val_epoch]

    with tf.Session() as sess:
      saver.restore(sess, './'  + dataset + "_" + str(seed) + "_best_model.ckpt")

      print("Testing on non-batched Valid ...")
      valid_perplexity = run_epoch(sess, mtest, valid_data, tf.no_op(), config=test_config, verbose=True)
      print("Full Valid Perplexity: %.3f, Bits: %.3f" % (valid_perplexity, np.log2(valid_perplexity)))

      print("Testing on non-batched Test ...")
      test_perplexity = run_epoch(sess, mtest, test_data, tf.no_op(), config=test_config, verbose=True)
      print("Full Test Perplexity: %.3f, Bits: %.3f" % (test_perplexity, np.log2(test_perplexity)))

      _run.info['full_best_valid_perplexity'] = valid_perplexity
      _run.info['full_test_perplexity'] = test_perplexity

  return vals[best_val_epoch]
Example #37
0
        dataset = paraman.get_dataset()

        dataset["x_train"] = dataset["x_train"].astype(np.float)
        if "x_test" in dataset:
            dataset["x_test"] = dataset["x_test"].astype(np.float)
            dataset["y_test"] = dataset["y_test"].astype(np.float)
            dataset["y_train"] = dataset["y_train"].astype(np.float)

        U_init = paraman.get_initialization_centroids(dataset["x_train"])

        if paraman["kmeans"]:
            U_final, indicator_vector_final = main_kmeans(
                dataset["x_train"], U_init)
            if paraman["palm"]:
                if paraman["--nb-factors"] is None:
                    paraman["--nb-factors"] = int(np.log2(min(U_init.shape)))
                paraman["--residual-on-right"] = True if U_init.shape[
                    1] >= U_init.shape[0] else False

                U_final = process_palm_on_top_of_kmeans(U_final)
                distances = get_distances(dataset["x_train"], U_final)
                indicator_vector_final = np.argmin(distances, axis=1)

        elif paraman["qmeans"]:
            # paraman_q = ParameterManagerQmeans(arguments)
            # paraman.update(paraman_q)
            if paraman["--nb-factors"] is None:
                paraman["--nb-factors"] = int(np.log2(min(U_init.shape)))
            paraman["--residual-on-right"] = True if U_init.shape[
                1] >= U_init.shape[0] else False
            U_final, indicator_vector_final = main_qmeans(
Example #38
0
def _cllr(lr0, lr1):
    with np.errstate(divide='ignore'):
        cllr0 = np.mean(np.log2(1 + lr0))
        cllr1 = np.mean(np.log2(1 + 1/lr1))
        return .5 * (cllr0 + cllr1)
Example #39
0
def refExpScatter(exp_matrix,
                  ref_samples: list,
                  ref_mode='mean',
                  ref_name='ref',
                  id2symbol=None,
                  corr_method='pearson',
                  log_base=2,
                  log_additive=1,
                  group_gene_files: list = None,
                  scatter_ncols=2,
                  mad_ncols=4,
                  prefix='refExpScatter',
                  mad_ymax: float = None):
    """
    输入表达矩阵,指定参考样本,计算其他样本与其的相关性,并以scatter和MAD图的形式呈现
    :param exp_matrix: 表达矩阵
    :param ref_samples: 参考样列表
    :param ref_mode: 如mean表示计算参考样的平均值作为参考表达量
    :param ref_name: 参考样的名称
    :param id2symbol: 第一列为geneid而第二列为symbol的文件
    :param corr_method: 相关系数计算方法
    :param log_base: log base default 2
    :param log_additive: pseudo count added before log
    :param group_gene_files: 每个文件记录一组基因id, 方便对基因进行分组并标注不同的颜色
    :param scatter_ncols: 散点图的列数
    :param mad_ncols: MAD图的列数
    :param prefix: 输出文件prefix
    :param mad_ymax: MAD图的y坐标最大值
    :return:
    """
    exp_df = pd.read_csv(exp_matrix,
                         index_col=0,
                         header=0,
                         sep=None,
                         engine='python')
    if log_base == 2:
        exp_df = np.log2(exp_df + log_additive)
    elif log_base == 10:
        exp_df = np.log10(exp_df + log_additive)
    elif log_base == 1:
        pass
    else:
        raise Exception(f'{log_base} not supported! use 1(mean no log), 2, 10')
    id2symbol = dict(x.strip().split()[:2]
                     for x in open(id2symbol)) if id2symbol else dict()
    if ref_mode == 'mean':
        ref_exp_list = exp_df.loc[:, ref_samples].mean(axis=1)
    else:
        ref_exp_list = exp_df.loc[:, ref_samples].median(axis=1)
    grouped_gene_list = list()
    names = ['gene']
    group_gene_files = [] if group_gene_files is None else group_gene_files
    for group_file in group_gene_files:
        names.append(os.path.basename(group_file))
        grouped_gene_list.append(
            [x.strip().split()[0] for x in open(group_file)])
    if grouped_gene_list:
        not_grouped_genes = set(exp_df.index) - set(x
                                                    for y in grouped_gene_list
                                                    for x in y)
        grouped_gene_list.insert(0, not_grouped_genes)
    else:
        grouped_gene_list = [list(exp_df.index)]
    group_and_color = list(
        zip(grouped_gene_list,
            colorpool.get_color_pool(len(grouped_gene_list)), names))
    # plot scatter
    test_samples = [x for x in exp_df.columns if x not in ref_samples]
    exp_df['symbols'] = [
        id2symbol[x] if x in id2symbol else x for x in exp_df.index
    ]
    plots = list()
    for each in test_samples:
        plot_data = exp_df.loc[:, [each]]
        plot_data[ref_name] = ref_exp_list
        plot_data = plot_data[plot_data[ref_name] * plot_data[each] > 0]
        plot_data = plot_data.round(4)
        plot_data['symbols'] = exp_df['symbols']
        corr, pval = correlation_function(corr_method)(plot_data[each],
                                                       plot_data[ref_name])
        pval = format(pval, '.2e') if pval < 0.001 else round(pval, 4)
        corr = round(corr, 3)
        p = figure(
            title="{} vs {}({}) {}_Corr={} Pval={}".format(
                each, ref_mode, ref_name, corr_method, corr, pval),
            # tools="wheel_zoom,reset,hover",
            tooltips=[
                ('x', '@{}'.format(ref_name)),
                ('y', '@{}'.format(each)),
                ('gene', '@symbols' if id2symbol else '@index'),
            ])
        for group, color, name in group_and_color:
            target_index = list(set(group) & set(plot_data.index))
            source_data = plot_data.loc[target_index, :]
            if source_data.shape[0] == 0:
                continue
            source = ColumnDataSource(source_data)
            p.scatter(
                x=ref_name,
                y=each,
                # line_color=mapper,
                color=color,
                fill_alpha=0.2,
                size=5,
                legend=name,
                source=source)
        p.xaxis.axis_label = 'log{}(expr+{}) of {}'.format(
            log_base, log_additive, ref_name)
        p.yaxis.axis_label = 'log{}(expr+{}) of {}'.format(
            log_base, log_additive, each)
        plots.append(p)

    fig = gridplot(plots, ncols=scatter_ncols)
    output_file(prefix + '.scatter.html')
    save(fig)
    # export_png(fig, prefix+'.scatter.png')

    # plot Variation in
    # gene expression as a function of gene expression level across sample replicates
    upper_list = []
    plots = list()
    for each in test_samples:
        plot_data = exp_df.loc[:, [each]]
        plot_data[ref_name] = ref_exp_list
        plot_data = plot_data[plot_data[ref_name] * plot_data[each] > 0]
        plot_data[each] = (plot_data[each] -
                           plot_data[ref_name]).abs() / plot_data[ref_name]
        plot_data = plot_data.round(4)
        if mad_ymax is None:
            describe = plot_data[each].describe()
            upper = describe['75%'] + 2 * (describe['75%'] - describe['25%'])
        else:
            upper = mad_ymax
        upper_list.append(upper)
        plot_data[each][plot_data[each] > upper] = upper
        plot_data['symbols'] = exp_df['symbols']
        p = figure(
            title="{} vs {}({})".format(each, ref_mode, ref_name),
            # tools="wheel_zoom,reset,hover",
            tooltips=[
                ('x', '@{}'.format(ref_name)),
                ('y', '@{}'.format(each)),
                ('gene', '@symbols' if id2symbol else '@index'),
            ])
        for group, color, name in group_and_color:
            source_data = plot_data.loc[set(group) & set(plot_data.index)]
            if source_data.shape[0] == 0:
                continue
            source = ColumnDataSource(source_data)
            p.scatter(
                x=ref_name,
                y=each,
                # line_color=mapper,
                color=color,
                fill_alpha=0.2,
                size=5,
                legend=name,
                source=source)
        p.xaxis.axis_label = 'log{}(expr+{}) of {}'.format(
            log_base, log_additive, ref_name)
        p.yaxis.axis_label = '|expr - {}_expr| / {}_expr'.format(
            ref_mode, ref_mode)
        plots.append(p)
        if mad_ymax is not None:
            p.y_range = Range1d(0, mad_ymax)
    else:
        fig = gridplot(plots, ncols=mad_ncols, sizing_mode='stretch_width')
        output_file(prefix + '.MDA.html')
        save(fig)
import superimport

import numpy as np
import matplotlib.pyplot as plt
import pyprobml_utils as pml


x = np.linspace(0,1,10000)

H = lambda x: -(x*np.log2(x) + (1-x) * np.log2(1-x))

plt.plot(x, H(x), '-b', lw=3)
plt.xlim((-0.01, 1.01))
plt.ylim((0, 1.01))

plt.xlabel("p(X = 1)")
plt.ylabel("H(X)")

ticks = [0, 0.5, 1]
plt.xticks(ticks)
plt.yticks(ticks)

pml.savefig("bernoulliEntropy.pdf")
plt.show()
                        inter_signal_value = 0
                        interference = 0

                        for signal_dict in signal_list:
                            if signal_dict["node_type"] == node_type:
                                if signal_dict["node_id"] == fixed_node_id:
                                    inter_signal_value = signal_dict["signal"]
                                else:
                                    interference += signal_dict["signal"]
                            else:
                                interference += signal_dict["signal"]

                        SINR = inter_signal_value / (interference +
                                                     white_gaussian_noise)
                        channel_data_size = task_time * sub_channel_bandwidth * np.log2(
                            1 + SINR)
                        task_data_size += channel_data_size
                task_transmission_data = {
                    "task_id": task_id,
                    "task_data_size": task_data_size
                }
                task_transmission_data_list.append(task_transmission_data)

            task_transmission_data_dict_of_all_nodes[str(
                fixed_node_id)] = task_transmission_data_list

        node_type = settings.NODE_TYPE_MOBILE
        for mobile_node_id in range(fixed_node_num,
                                    node_num_list[experiment_median_no]):

            task_id_list = task_id_under_each_node_list[mobile_node_id]
Example #42
0
def training_schedule(
    cur_nimg,
    training_set,
    num_gpus,
    lod_initial_resolution=4,  # Image resolution used at the beginning.
    lod_training_kimg=600,  # Thousands of real images to show before doubling the resolution.
    lod_transition_kimg=600,  # Thousands of real images to show when fading in new layers.
    minibatch_base=16,  # Maximum minibatch size, divided evenly among GPUs.
    minibatch_dict={},  # Resolution-specific overrides.
    max_minibatch_per_gpu={},  # Resolution-specific maximum minibatch size per GPU.
    G_lrate_base=0.001,  # Learning rate for the generator.
    G_lrate_dict={},  # Resolution-specific overrides.
    D_lrate_base=0.001,  # Learning rate for the discriminator.
    D_lrate_dict={},  # Resolution-specific overrides.
    lrate_rampup_kimg=0,  # Duration of learning rate ramp-up.
    tick_kimg_base=160,  # Default interval of progress snapshots.
    tick_kimg_dict={
        4: 160,
        8: 140,
        16: 120,
        32: 100,
        64: 80,
        128: 60,
        256: 40,
        512: 30,
        1024: 20
    }):  # Resolution-specific overrides.

    # Initialize result dict.
    s = dnnlib.EasyDict()
    s.kimg = cur_nimg / 1000.0

    # Training phase.
    phase_dur = lod_training_kimg + lod_transition_kimg
    phase_idx = int(np.floor(s.kimg / phase_dur)) if phase_dur > 0 else 0
    phase_kimg = s.kimg - phase_idx * phase_dur

    # Level-of-detail and resolution.
    s.lod = training_set.resolution_log2
    s.lod -= np.floor(np.log2(lod_initial_resolution))
    s.lod -= phase_idx
    if lod_transition_kimg > 0:
        s.lod -= max(phase_kimg - lod_training_kimg, 0.0) / lod_transition_kimg
    s.lod = max(s.lod, 0.0)
    s.resolution = 2**(training_set.resolution_log2 - int(np.floor(s.lod)))

    # Minibatch size.
    s.minibatch = minibatch_dict.get(s.resolution, minibatch_base)
    s.minibatch -= s.minibatch % num_gpus
    if s.resolution in max_minibatch_per_gpu:
        s.minibatch = min(s.minibatch,
                          max_minibatch_per_gpu[s.resolution] * num_gpus)

    # Learning rate.
    s.G_lrate = G_lrate_dict.get(s.resolution, G_lrate_base)
    s.D_lrate = D_lrate_dict.get(s.resolution, D_lrate_base)
    if lrate_rampup_kimg > 0:
        rampup = min(s.kimg / lrate_rampup_kimg, 1.0)
        s.G_lrate *= rampup
        s.D_lrate *= rampup

    # Other parameters.
    s.tick_kimg = tick_kimg_dict.get(s.resolution, tick_kimg_base)
    return s
Example #43
0
def d_to_phi(d):
    return -np.log2(d / 0.001)
Example #44
0
    def compose(self, nr_of_compositions):
        if not self.caching_directory:
            self.logger.critical("This method requires caching. Abort")
            return None

        state_filepath_base = os.path.join(self.caching_directory, "compositions-")
        get_state_filepath = lambda needed_exp: state_filepath_base + str(int(2**needed_exp))
        target_state_filepath = state_filepath_base + str(nr_of_compositions)

        if os.path.isdir(target_state_filepath):
            self.logger.info("Target state is cached. Loading it")
            return self.load_state(target_state_filepath)

        # wich compositions do we need
        target_exp = int( np.log2(nr_of_compositions) )
        needed_compositions = [ x if (nr_of_compositions & (2**x) != 0) else -1 for x in range(target_exp + 1)]
        needed_compositions = list(filter(lambda a: a != -1, needed_compositions))

        self.logger.info("Needed compositions: " + ", ".join(map(str, needed_compositions)))

        # start with a copy of the current state
        previous_state = self.copy()
        avoided_self_composition = False

        # which compositions already exist? Generate?
        for needed_exp in range(target_exp + 1):
            state_filepath = get_state_filepath(needed_exp)
            if not os.path.isdir(state_filepath):
                self.logger.info("[*] State 2**" + str(needed_exp) + " does not exist. Creating it.")
                if not needed_exp == 0:
                    if avoided_self_composition:  # only load from disk when it differs from current state
                        previous_state_filepath = get_state_filepath(needed_exp - 1)
                        previous_state = self.load_state(previous_state_filepath)
                        avoided_self_composition = False
                    previous_state.compose_with(previous_state)
                    previous_state.print_state()
                    # previous_state.print_state()
                previous_state.save_state(state_filepath)
                gc.collect()
            else:
                avoided_self_composition = True
        self.logger.info("[*] All intermediate states up to 2**" + str(target_exp) + " exist now")

        previous_state = self.load_state( get_state_filepath(needed_compositions[0]) )
        self.logger.info("[*] Loaded state 2**" + str(needed_compositions[0]))

        # compose to the desired state
        for i in needed_compositions[1:]:
            self.logger.info("[*] Compose with state 2**{}".format(i))
            current_state = self.load_state(get_state_filepath(i))
            # while the factor of previous state and current state is not same
            while(current_state.factor != previous_state.factor):
                self.logger.info("factors are unequal ( {} != {} ), squaring".format(current_state.factor, previous_state.factor))
                if current_state.factor > previous_state.factor:
                    previous_state.squaring()
                else:
                    current_state.squaring()
            # now the factor should be the same
            previous_state.compose_with(current_state)

        previous_state.print_state()
        previous_state.save_state(target_state_filepath)  # caching..
        return previous_state
Example #45
0
    for ii in range(len(seqs)):
        m[temp.index(seqs[ii][i])][i] += 1

m1 = m / np.sum(m,axis=0)
m1 = m1.T
m2 = m1 + 0.000000001

print 'the most probable word:'
for i in m1:
    print temp[np.argmax(i)],


# Calculating information content
if args.use_back_freq:
    inf = np.sum(
        m2 * np.log2(m2/np.array(freq)), axis=-1).reshape((-1,1))
else:
    inf = np.log2(len(temp)) + np.sum(m2 * np.log2(m2), axis=-1).reshape((-1,1))
infm = inf * m1



# Making input to the function
inp = []
for i in range(len(seqs[0])):
    inbetween = []
    hk = []
    sorting = []
    sortlet = []
    for ii in range(len(temp)):
        inbetween.append((temp[ii], infm[i][ii]))
Example #46
0
def transfer_state(state, gate_matrix, bits):
    """
    Transfer to the next state
    :param state:
    :param gate_matrix:
    :param bits:
    :return:
    """

    assert (type(gate_matrix) is np.ndarray) or \
           (type(gate_matrix) is paddle.Tensor and gate_matrix.dtype.name == "COMPLEX128")

    assert type(
        state) is paddle.Tensor and state.dtype.name == "COMPLEX128" and len(
            state.shape) == 1
    # calc source_pos target_pos
    n = int(np.log2(state.shape[0]))
    source_pos = copy.deepcopy(
        bits)  # copy bits, it should NOT change the order of bits
    # source_pos = [n - 1 - idex for idex in source_pos]  # qubit index
    # source_pos = list(reversed(source_pos))  # reverse qubit index
    target_pos = list(range(len(bits)))

    # ### check
    # state_check = transfer_state(paddle.reshape(state, [2] * n), gate_matrix, bits)
    # state_check = paddle.reshape(state_check, [2 ** n])

    # compressed moveaxis
    # compress the continuous dim before moveaxis
    # e.g. single operand: before moveaxis 2*2*[2]*2*2 -compress-> 4*[2]*4, after moveaxis [2]*2*2*2*2 -compress-> [2]*4*4
    #      double operands: before moveaxis 2*2*[2]*2*2*[2]*2*2 -compress-> 4*[2]*4*[2]*4, after moveaxis [2]*[2]*2*2*2*2*2*2 -compress-> [2]*[2]*4*4*4
    # the peak rank is 5 when the number of operands is 2
    assert len(source_pos) == 1 or len(source_pos) == 2
    compressed_shape_before_moveaxis = [1]
    compressed_source_pos = [-1] * len(source_pos)
    for i in range(n):
        if i in source_pos:
            compressed_source_pos[source_pos.index(i)] = len(
                compressed_shape_before_moveaxis)
            compressed_shape_before_moveaxis.append(2)
            compressed_shape_before_moveaxis.append(1)
        else:
            compressed_shape_before_moveaxis[
                -1] = compressed_shape_before_moveaxis[-1] * 2
    # print([2] * n)
    # print(source_pos)
    # print('->')
    # print(compressed_shape)
    # print(compressed_source_pos)  # always [1], [1, 3], or [3, 1]
    state = paddle.reshape(state, compressed_shape_before_moveaxis)
    state = complex_moveaxis(state, compressed_source_pos, target_pos)
    compressed_shape_after_moveaxis = state.shape

    # reshape
    state_new_shape = [2**len(bits), 2**(n - len(bits))]
    state = paddle.reshape(state, state_new_shape)

    # gate_matrix
    if type(gate_matrix) is np.ndarray:
        gate_new_shape = [
            2**(len(gate_matrix.shape) - len(bits)), 2**len(bits)
        ]
        gate_matrix = gate_matrix.reshape(gate_new_shape)
        gate_matrix = paddle.to_tensor(gate_matrix)
    elif type(gate_matrix
              ) is paddle.Tensor and gate_matrix.dtype.name == "COMPLEX128":
        pass
    else:
        assert False

    # matmul
    state = paddle.matmul(gate_matrix, state)

    # restore compressed moveaxis reshape
    state = paddle.reshape(state, compressed_shape_after_moveaxis)
    state = complex_moveaxis(state, target_pos, compressed_source_pos)
    state = paddle.reshape(state, [2**n])

    # ### check
    # assert (np.all(state.numpy() == state_check.numpy()))

    return state
Example #47
0
    def update_desvars_oc(self):
        """
        Update the design variables by means of OC-like or equivalently SAO
        method, using the filtered sensitivities; return the updated design
        variables.

        EXAMPLES:
            >>> t.update_desvars_oc()

        See also: sens_analysis, filter_sens_sigmund

        """
        if not self.topydict:
            raise ToPyError('You must first load a TPD file!')
        # 'p' stays constant for a specified number of iterations from start.
        # 'p' is incremented, but not more than the maximum allowable value.
        # If continuation parameters are not specified in the input file, 'p'
        # will stay constant:
        if self.pcount >= self._phold:
            if (self.p + self._pincr) < self._pmax + self._pincr:
                if (self.pcount - self._phold) % self._pcon == 0:
                    self.p += self._pincr

        if self.qcount >= self._qhold:
            if (self.q + self._qincr) < self._qmax:
                if (self.qcount - self._qhold) % self._qcon == 0:
                    self.q += self._qincr

        self.pcount += 1
        self.qcount += 1

        # Exponential approximation of eta (damping factor):
        if self.itercount > 1:
            if self.topydict['ETA'] == 'exp':  #  Check TPD specified value
                mask = equal(self.desvarsold / self.desvars, 1)
                self.a = 1 + log2(abs(self.dfold / self.df)) / \
                log2(self.desvarsold / self.desvars + mask) + \
                mask * (self.a - 1)
                self.a = clip(self.a, A_LOW, A_UPP)
                self.eta = 1 / (1 - self.a)

        self.dfold = self.df.copy()
        self.desvarsold = self.desvars.copy()

        # Change move limit for compliant mechanism synthesis:
        if self.probtype == 'mech':
            move = 0.1
        else:
            move = 0.2
        lam1, lam2 = 0, 100e3
        dims = self.desvars.shape
        while (lam2 - lam1) / (lam2 + lam1) > 1e-8 and lam2 > 1e-40:
            lammid = 0.5 * (lam1 + lam2)
            if self.probtype == 'mech':
                if self.approx == 'dquad':
                    curv = -1 / (self.eta * self.desvars) * self.df
                    beta = maximum(self.desvars - (self.df + lammid) / curv,
                                   self.void)
                    move_upper = minimum(move, self.desvars / 3)
                    desvars = maximum(self.void, maximum((self.desvars - move),\
                    minimum(self.solid,  minimum((self.desvars + move), \
                    (self.desvars * maximum(1e-10, \
                    (-self.df / lammid))**self.eta)**self.q))))
                else:  # reciprocal or exponential
                    desvars = maximum(self.void, maximum((self.desvars - move),\
                    minimum(self.solid,  minimum((self.desvars + move), \
                    (self.desvars * maximum(1e-10, \
                    (-self.df / lammid))**self.eta)**self.q))))
            else:  # compliance or heat
                if self.approx == 'dquad':
                    curv = -1 / (self.eta * self.desvars) * self.df
                    beta = maximum(self.desvars - (self.df + lammid) / curv,
                                   self.void)
                    move_upper = minimum(move, self.desvars / 3)
                    desvars = maximum(self.void, maximum((self.desvars - move),\
                    minimum(self.solid,  minimum((self.desvars + move_upper), \
                    beta**self.q))))
                else:  # reciprocal or exponential
                    desvars = maximum(self.void, maximum((self.desvars - move),\
                    minimum(self.solid,  minimum((self.desvars + move), \
                    (self.desvars * (-self.df / lammid)**self.eta)**self.q))))

            # Check for passive and active elements, modify updated x:
            if self.pasv.any() or self.actv.any():
                flatx = desvars.flatten()
                idx = []
                if self.nelz == 0:
                    y, x = dims
                    for j in range(x):
                        for k in range(y):
                            idx.append(k * x + j)
                else:
                    z, y, x = dims
                    for i in range(z):
                        for j in range(x):
                            for k in range(y):
                                idx.append(k * x + j + i * x * y)
                if self.pasv.any():
                    pasv = take(idx, self.pasv)  #  new indices
                    put(flatx, pasv, self.void)  #  = zero density
                if self.actv.any():
                    actv = take(idx, self.actv)  #  new indices
                    put(flatx, actv, self.solid)  #  = self.solid
                desvars = flatx.reshape(dims)

            if self.nelz == 0:
                if desvars.sum() - self.nelx * self.nely * self.volfrac > 0:
                    lam1 = lammid
                else:
                    lam2 = lammid
            else:
                if desvars.sum() - self.nelx * self.nely * self.nelz *\
                self.volfrac > 0:
                    lam1 = lammid
                else:
                    lam2 = lammid
        self.lam = lammid

        self.desvars = desvars

        # Change in design variables:
        self.change = (abs(self.desvars - self.desvarsold)).max()

        # Solid-self.void fraction:
        nr_s = self.desvars.flatten().tolist().count(self.solid)
        nr_v = self.desvars.flatten().tolist().count(self.void)
        self.svtfrac = (nr_s + nr_v) / self.desvars.size
Example #48
0
def make_scales(N,
                min_scale=None,
                max_scale=None,
                nv=32,
                scaletype='log',
                wavelet=None,
                downsample=None):
    """Recommended to first work out `min_scale` & `max_scale` with
    `cwt_scalebounds`.

    # Arguments:
        N: int
            `len(x)` or `len(x_padded)`.

        min_scale, max_scale: float, float
            Set scale range. Obtained e.g. from `utils.cwt_scalebounds`.

        nv: int
            Number of voices (wavelets) per octave.

        scaletype: str['log', 'log-piecewise', 'linear']
            Scaling kind to make.
            `'log-piecewise'` uses `utils.find_downsampling_scale`.

        wavelet: wavelets.Wavelet
            Used only for `scaletype='log-piecewise'`.

        downsample: int
            Downsampling factor. Used only for `scaletype='log-piecewise'`.

    # Returns:
        scales: np.ndarray
    """
    if scaletype == 'log-piecewise' and wavelet is None:
        raise ValueError(
            "must pass `wavelet` for `scaletype == 'log-piecewise'`")
    if min_scale is None and max_scale is None and wavelet is not None:
        min_scale, max_scale = cwt_scalebounds(wavelet, N, use_padded_N=True)
    else:
        min_scale = min_scale or 1
        max_scale = max_scale or N
    downsample = int(
        gdefaults('utils.cwt_utils.make_scales', downsample=downsample))

    # number of 2^-distributed scales spanning min to max
    na = int(np.ceil(nv * np.log2(max_scale / min_scale)))
    # floor to keep freq-domain peak at or to right of Nyquist
    # min must be more precise, if need integer rounding do on max
    mn_pow = int(np.floor(nv * np.log2(min_scale)))
    mx_pow = mn_pow + na

    if scaletype == 'log':
        scales = 2**(np.arange(mn_pow, mx_pow) / nv)

    elif scaletype == 'log-piecewise':
        scales = 2**(np.arange(mn_pow, mx_pow) / nv)
        idx = find_downsampling_scale(wavelet, scales)
        if idx is not None:
            # `+downsample - 1` starts `scales2` as continuing from `scales1`
            # at `scales2`'s sampling rate; rest of ops are based on this design,
            # such as `/nv` in ssq, which divides `scales2[0]` by `nv`, but if
            # `scales2[0]` is one sample away from `scales1[-1]`, seems incorrect
            scales1 = scales[:idx]
            scales2 = scales[idx + downsample - 1::downsample]
            scales = np.hstack([scales1, scales2])

    elif scaletype == 'linear':
        # TODO poor scheme (but there may not be any good one)
        min_scale, max_scale = 2**(mn_pow / nv), 2**(mx_pow / nv)
        na = int(np.ceil(max_scale / min_scale))
        scales = np.linspace(min_scale, max_scale, na)

    else:
        raise ValueError("`scaletype` must be 'log' or 'linear'; "
                         "got: %s" % scaletype)
    scales = scales.reshape(-1, 1)  # ensure 2D for broadcast ops later
    return scales
def process_tfbs(TF_name, args, log2fc_params): 	#per tf
	""" Processes single TFBS to split into bound/unbound and write out overview file """

	#begin_time = datetime.now()
	logger = TobiasLogger("", args.verbosity, args.log_q) 	#sending all logger calls to log_q

	#Pre-scanned sites to read
	bed_outdir = os.path.join(args.outdir, TF_name, "beds")
	filename = os.path.join(bed_outdir, TF_name + ".tmp")
	tmp_files = [filename]
	no_cond = len(args.cond_names)
	comparisons = args.comparisons

	#Set distribution function 
	diff_dist = scipy.stats.norm
	
	#Subset analysis to args.output_peaks if these were given
	if args.output_peaks is not None:
		output_peaks_bt = BedTool(args.output_peaks)
		sites_bt = BedTool(filename)
		intersection = sites_bt.intersect(output_peaks_bt, u=True)
		filename = intersection.fn	#Overwrite filename with the path to the bedtools object
		tmp_files.append(intersection.fn)

	#Read file to list of dicts
	stime = datetime.now()
	header = ["TFBS_chr", "TFBS_start", "TFBS_end", "TFBS_name", "TFBS_score", "TFBS_strand"] + args.peak_header_list + ["{0}_score".format(condition) for condition in args.cond_names]
	with open(filename) as f:
		bedlines = [dict(zip(header, line.rstrip().split("\t"))) for line in f.readlines()]
	n_rows = len(bedlines)
	etime = datetime.now()
	logger.spam("{0} - Reading took:\t{1}".format(TF_name, etime - stime))
	
	if n_rows == 0:
		logger.warning("No TFBS found for TF {0} - output .bed/.txt files will be empty and excel output will be skipped.".format(TF_name))


	############################## Local effects ###############################
	
	stime = datetime.now()

	#Sort, scale and calculate log2fc
	bedlines = sorted(bedlines, key=lambda line: (line["TFBS_chr"], int(line["TFBS_start"]), int(line["TFBS_end"])))
	for line in bedlines:
	
		#Condition specific
		for condition in args.cond_names:
			threshold = args.thresholds[condition]
			line[condition + "_score"] = float(line[condition + "_score"])
			original = line[condition + "_score"]

			line[condition + "_score"] = args.norm_objects[condition].normalize(original)  #normalize score
			line[condition + "_score"] = line[condition + "_score"] if line[condition + "_score"]  > 0 else 0 # any scores below 0 -> 0
			line[condition + "_score"] = round(line[condition + "_score"], 5)

			if line[condition + "_score"] < 0:
				logger.error("negative values: {0}. Original: {1}".format(line[condition + "_score"], original))

			line[condition + "_bound"] = 1 if line[condition + "_score"] > threshold else 0

		#Comparison specific
		for i, (cond1, cond2) in enumerate(comparisons):
			base = "{0}_{1}".format(cond1, cond2)
			line[base + "_log2fc"] = round(np.log2((line[cond1 + "_score"] + args.pseudo) / (line[cond2 + "_score"] + args.pseudo)), 5)

	#### Write _all file ####
	outfile = os.path.join(bed_outdir, TF_name + "_all.bed")
	dict_to_tab(bedlines, outfile, header)

	#### Write _bound/_unbound files ####
	for condition in args.cond_names:
		chosen_columns = header[:-no_cond] + [condition + "_score"]	#header[:-no_cond] removes the no_cond last columns containing scores

		#Subset bedlines per state
		for state in ["bound", "unbound"]:
			outfile = os.path.join(bed_outdir, "{0}_{1}_{2}.bed".format(TF_name, condition, state))
			chosen_bool = 1 if state == "bound" else 0
			bedlines_subset = [bedline for bedline in bedlines if bedline[condition + "_bound"] == chosen_bool]
			#bedlines_subset = sorted(bedlines_subset, key= lambda line: line[condition + "_score"], reverse=True)
			dict_to_tab(bedlines_subset, outfile, chosen_columns)

	##### Write overview with scores, bound and log2fcs ####
	overview_columns = header + [condition + "_bound" for condition in args.cond_names] + ["{0}_{1}_log2fc".format(cond1, cond2) for (cond1, cond2) in comparisons]
	overview_txt = os.path.join(args.outdir, TF_name, TF_name + "_overview.txt")
	dict_to_tab(bedlines, overview_txt, overview_columns, header=True)	#Write dictionary to table
	
	#Write xlsx overview
	bed_table = pd.DataFrame(bedlines, columns=overview_columns)
	nrow, ncol = bed_table.shape 
	logger.spam("Read table of shape {0} for TF {1}".format((nrow, ncol), TF_name))

	stime_excel = datetime.now()
	if args.skip_excel == False and n_rows > 0:
		try:
			overview_excel = os.path.join(args.outdir, TF_name, TF_name + "_overview.xlsx")
			writer = pd.ExcelWriter(overview_excel, engine='xlsxwriter') #, options=dict(constant_memory=True))
			bed_table.to_excel(writer, index=False, columns=overview_columns)

			#autfilter not possible with constant_memory
			worksheet = writer.sheets['Sheet1']
			no_rows, no_cols = bed_table.shape
			worksheet.autofilter(0,0,no_rows, no_cols)
			writer.save()

		except Exception as e:
			logger.warning("Could not write excelfile for TF {0}. Exception was: {1}".format(TF_name, e))

	etime_excel = datetime.now()
	etime = datetime.now()
	logger.spam("{0} - Local effects took:\t{1} (excel: {2})".format(TF_name, etime - stime, etime_excel - stime_excel))

	############################## Global effects ##############################

	stime = datetime.now()

	#Get info table ready
	info_columns = ["total_tfbs"]
	info_columns.extend(["{0}_{1}".format(cond, metric) for (cond, metric) in itertools.product(args.cond_names, ["mean_score", "bound"])])
	info_columns.extend(["{0}_{1}_{2}".format(comparison[0], comparison[1], metric) for (comparison, metric) in itertools.product(comparisons, ["change", "pvalue"])])
	rows, cols = 1, len(info_columns)
	info_table = pd.DataFrame(np.nan, columns=info_columns, index=[TF_name])

	#Fill in info table
	info_table.at[TF_name, "total_tfbs"] = n_rows

	for condition in args.cond_names:
		info_table.at[TF_name, condition + "_mean_score"] = round(np.mean(bed_table[condition + "_score"]), 5) if n_rows > 0 else np.nan
		info_table.at[TF_name, condition + "_bound"] = np.sum(bed_table[condition + "_bound"].values) #_bound contains bool 0/1
		
	#### Calculate statistical test for binding in comparison to background ####
	fig_out = os.path.abspath(os.path.join(args.outdir, TF_name, "plots", TF_name + "_log2fcs.pdf"))
	log2fc_pdf = PdfPages(fig_out, keep_empty=False) #do not write if there is only 1 condition or if there are no sites

	if n_rows > 0:	#log2fc only possible when more than one binding site was found
		for i, (cond1, cond2) in enumerate(comparisons):
			base = "{0}_{1}".format(cond1, cond2)

			# Compare log2fcs to background log2fcs
			included = np.logical_or(bed_table[cond1 + "_score"].values > 0, bed_table[cond2 + "_score"].values > 0)
			subset = bed_table[included].copy() 		#included subset 
			subset.loc[:,"peak_id"] = ["_".join([chrom, str(start), str(end)]) for (chrom, start, end) in zip(subset["peak_chr"].values, subset["peak_start"].values, subset["peak_end"].values)]	
			
			observed_log2fcs = subset.groupby('peak_id')[base + '_log2fc'].mean().reset_index()[base + "_log2fc"].values		#if more than one TFBS per peak -> take mean value

			#Estimate mean/std
			bg_params = log2fc_params[(cond1, cond2)]
			obs_params = diff_dist.fit(observed_log2fcs)

			obs_mean, obs_std = obs_params	#only for scipy.stats.norm
			bg_mean, bg_std = bg_params
			obs_no = np.min([len(observed_log2fcs), 50000])		#Set cap on obs_no to prevent super small p-values
			n_obs = len(observed_log2fcs)

			#If there was any change found at all (0 can happen if two bigwigs are the same)
			if obs_mean != bg_mean: 
				info_table.at[TF_name, base + "_change"] = (obs_mean - bg_mean) / np.mean([obs_std, bg_std])  #effect size
				info_table.at[TF_name, base + "_change"] = np.round(info_table.at[TF_name, base + "_change"], 5)
			
			#Else not possible to compare groups
			else:
				info_table.at[TF_name, base + "_change"] = 0
				info_table.at[TF_name, base + "_pvalue"] = 1

			#Sample from background distribution
			np.random.seed(n_obs)
			sample_changes = []
			for i in range(100):
				sample = diff_dist.rvs(*log2fc_params[(cond1, cond2)], size=n_obs)	
				sample_mean, sample_std = np.mean(sample), np.std(sample)
				sample_change = (sample_mean - bg_mean) / np.mean([sample_std, bg_std])
				sample_changes.append(sample_change)

			#Write out differential scores
			if args.debug:
				f = open(os.path.join(args.outdir, TF_name, "sampled_differential_scores.txt"), "w")
				f.write("\n".join([str(val) for val in sample_changes]))
				f.close()

			#Estimate p-value by comparing sampling to observed mean
			ttest = scipy.stats.ttest_1samp(sample_changes, info_table.at[TF_name, base + "_change"])
			info_table.at[TF_name, base + "_pvalue"] = ttest[1]

			#### Plot comparison ###
			fig, ax = plt.subplots(1,1)
			ax.hist(observed_log2fcs, bins='auto', label="Observed log2fcs", density=True)
			xvals = np.linspace(plt.xlim()[0], plt.xlim()[1], 100)
			
			#Observed distribution
			pdf = diff_dist.pdf(xvals, *obs_params)
			ax.plot(xvals, pdf, label="Observed distribution (fit)", color="red", linestyle="--")
			ax.axvline(obs_mean, color="red", label="Observed mean")
			
			#Background distribution
			pdf = diff_dist.pdf(xvals, *bg_params)
			ax.plot(xvals, pdf, label="Background distribution (fit)", color="Black", linestyle="--")
			ax.axvline(bg_mean, color="black", label="Background mean")

			#Set size
			x0,x1 = ax.get_xlim()
			y0,y1 = ax.get_ylim()
			ax.set_aspect(((x1-x0)/(y1-y0)) / 1.5)

			#Decorate
			ax.legend()
			plt.xlabel("Log2 fold change", fontsize=8)
			plt.ylabel("Density", fontsize=8)
			plt.title("Differential binding for TF \"{0}\"\nbetween ({1} / {2})".format(TF_name, cond1, cond2), fontsize=10)
			ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
			
			plt.tight_layout()
			log2fc_pdf.savefig(fig, bbox_inches='tight')
			plt.close(fig)

			#etime_plot = datetime.now()
			#logger.debug("{0} - Plotting took:\t{1}".format(TF_name, etime_plot - stime_plot))

	log2fc_pdf.close()	
	
	etime = datetime.now()
	logger.spam("{0} - Global effects took:\t{1}".format(TF_name, etime - stime))

	#################### Remove temporary file ######################

	for filename in tmp_files:
		try:
			os.remove(filename)
		except:
			logger.error("Could not remove temporary file {0} - this does not effect the results of BINDetect.".format(filename) )

	return(info_table)
Example #50
0
print('Starting Active Learning')

for i in range(acquisition_iterations):
    print('POOLING ITERATION', i)

    score_All = np.zeros(shape=(X_Pool.shape[0], nb_classes))
    for d in range(dropout_iterations):
        print('Dropout Iteration', d)
        dropout_score = model.predict(X_Pool, batch_size=batch_size, verbose=1)
        np.save(
            '/Users/Riashat/Documents/Cambridge_THESIS/Code/Experiments/keras/active_learning/Acquisition_Functions/Bayesian_Active_Learning/GPU/Max_Entropy/Dropout_Scores/'
            + 'Dropout_Score_' + str(d) + '.npy', dropout_score)
        score_All = score_All + dropout_score

    Avg_Pi = np.divide(score_All, dropout_iterations)
    Log_Avg_Pi = np.log2(Avg_Pi)
    Entropy_Avg_Pi = -np.multiply(Avg_Pi, Log_Avg_Pi)
    Entropy_Average_Pi = np.sum(Entropy_Avg_Pi, axis=1)

    U_X = Entropy_Average_Pi

    # THIS FINDS THE MINIMUM INDEX
    # a_1d = U_X.flatten()
    # x_pool_index = a_1d.argsort()[-Queries:]

    a_1d = U_X.flatten()
    x_pool_index = U_X.argsort()[-Queries:][::-1]

    x_pool_All = np.append(x_pool_All, x_pool_index)

    #saving pooled images
Example #51
0
# 지수와 로그 함수

# 지수
print('지수')
a1 = np.random.randint(1, 10, size = 5)
print(a1)
print(np.exp(a1))
print(np.exp2(a1))
print(np.power(a1, 2))
print()

# 로그
print('로그')
print(np.log(a1))
print(np.log2(a1))
print(np.log10(a1))
print()

# 삼각 함수
print('삼각 함수')
t = np.linspace(0, np.pi, 3)
print(t)
print(np.sin(t))
print(np.cos(t))
print(np.tan(t))
x = [-1, 0, 1]
print(x)
print(np.arcsin(x))
print(np.arccos(x))
print(np.arctan(x))
def hz_to_midi(hz):
    return 12 * (torch.log2(hz) - np.log2(440.0)) + 69
Example #53
0
def bitcoin_core_log_var():
    yield from log_var_for_file("BitcoinCore-Mainnet.bin")


def column_for_levels(levels):
    return pd.DataFrame(levels)[0]  #.cummax()[0]


if __name__ == "__main__":
    import pandas as pd
    import matplotlib.pyplot as plt
    import numpy as np

    cols = [
        column_for_levels(lvls)
        for lvls in [bitcoin_cash_log_var(),
                     bitcoin_core_log_var()]
    ]
    max_col = sorted(cols, key=lambda col: -col.index.size)[0]
    log_col = pd.Series(np.log2(1 + max_col.index.values))

    df = pd.concat([*cols, log_col],
                   axis=1,
                   keys=["Bitcoin Cash", "Bitcoin",
                         r"$log_2(x)$"]).astype(float)

    ylabel = r"$|interlink| + log_2(\frac{\sf variableTarget}{\sf genesisTarget})$"
    df.plot(logx=True).set(ylabel=ylabel, xlabel="block height")
    plt.show()
def G_synthesis_sb_modular(
        dlatents_withl_in,  # Input: Disentangled latents (W) [minibatch, label_size+dlatent_size].
        dlatent_size=7,  # Disentangled latent (W) dimensionality. Including discrete info, rotation, scaling, xy shearing, and xy translation.
        label_size=0,  # Label dimensionality, 0 if no labels.
        module_list=None,  # A list containing module names, which represent semantic latents (exclude labels).
        num_channels=1,  # Number of output color channels.
        resolution=64,  # Output resolution.
        nonlinearity='lrelu',  # Activation function: 'relu', 'lrelu', etc.
        dtype='float32',  # Data type to use for activations and outputs.
        resample_kernel=[
            1, 3, 3, 1
        ],  # Low-pass filter to apply when resampling activations. None = no filtering.
        fused_modconv=True,  # Implement modulated_conv2d_layer() as a single fused op?
        use_noise=False,  # If noise is used in this dataset.
        randomize_noise=True,  # True = randomize noise inputs every time (non-deterministic), False = read noise inputs from variables.
        single_const=True,
        **_kwargs):  # Ignore unrecognized keyword args.
    '''
    Modularized spatial-biased network.
    '''
    resolution_log2 = int(np.log2(resolution))  # == 6 for resolution 64
    assert resolution == 2**resolution_log2 and resolution >= 4
    num_layers = resolution_log2 * 2 - 2  # == 10 for resolution 64

    act = nonlinearity
    images_out = None

    # Note that module_list may include modules not containing latents,
    # e.g. Conv layers (size in this case means number of conv layers).
    key_ls, size_ls, count_dlatent_size, n_content = split_module_names(
        module_list)
    if label_size > 0:
        key_ls.insert(0, 'Label')
        size_ls.insert(0, label_size)
        n_content += label_size
    # module_dict = collections.OrderedDict(zip(key_ls, size_ls))

    # Primary inputs.
    assert dlatent_size == count_dlatent_size
    dlatents_withl_in.set_shape([None, label_size + count_dlatent_size])
    dlatents_withl_in = tf.cast(dlatents_withl_in, dtype)

    # Early layers consists of 4x4 constant layer.
    y = None
    if single_const:
        with tf.variable_scope('4x4'):
            with tf.variable_scope('Const'):
                x = tf.get_variable(
                    'const',
                    shape=[1, 128, 4, 4],
                    initializer=tf.initializers.random_normal())
                x = tf.tile(tf.cast(x, dtype),
                            [tf.shape(dlatents_withl_in)[0], 1, 1, 1])
    else:
        with tf.variable_scope('4x4'):
            with tf.variable_scope('Const'):
                x = tf.get_variable(
                    'const',
                    shape=[n_content, 128, 4, 4],
                    initializer=tf.initializers.random_normal())

    subkwargs = EasyDict()
    subkwargs.update(dlatents_withl_in=dlatents_withl_in,
                     n_content=n_content,
                     act=act,
                     dtype=dtype,
                     resample_kernel=resample_kernel,
                     fused_modconv=fused_modconv,
                     use_noise=use_noise,
                     randomize_noise=randomize_noise)

    # Build modules by module_dict.
    start_idx = 0
    # print('module_dict:', module_dict)
    # for scope_idx, k in enumerate(module_dict):
    for scope_idx, k in enumerate(key_ls):
        if (k.startswith('Label')) or (k.startswith('D_global')):
            # e.g. {'Label': 3}, {'D_global': 3}
            x = build_D_layers(x,
                               name=k,
                               n_latents=size_ls[scope_idx],
                               start_idx=start_idx,
                               scope_idx=scope_idx,
                               single_const=single_const,
                               **subkwargs)
            start_idx += size_ls[scope_idx]
        elif k.startswith('C_global'):
            # e.g. {'C_global': 2}
            x = build_C_global_layers(x,
                                      name=k,
                                      n_latents=size_ls[scope_idx],
                                      start_idx=start_idx,
                                      scope_idx=scope_idx,
                                      **subkwargs)
            start_idx += size_ls[scope_idx]
        elif k.startswith('SB'):
            # e.g. {'SB-rotation: 1}, {'SB-shearing': 2}
            x = build_SB_layers(x,
                                name=k,
                                n_latents=size_ls[scope_idx],
                                start_idx=start_idx,
                                scope_idx=scope_idx,
                                **subkwargs)
            start_idx += size_ls[scope_idx]
        elif k.startswith('C_local_heat'):
            # e.g. {'C_local_heat': 4}
            x = build_local_heat_layers(x,
                                        name=k,
                                        n_latents=size_ls[scope_idx],
                                        start_idx=start_idx,
                                        scope_idx=scope_idx,
                                        **subkwargs)
            start_idx += size_ls[scope_idx]
        elif k.startswith('C_local_hfeat'):
            # e.g. {'C_local_hfeat_size': 4}
            x = build_local_hfeat_layers(x,
                                         name=k,
                                         n_latents=size_ls[scope_idx],
                                         start_idx=start_idx,
                                         scope_idx=scope_idx,
                                         **subkwargs)
            start_idx += size_ls[scope_idx]
        elif k.startswith('Noise'):
            # e.g. {'Noise': 1}
            x = build_noise_layer(x,
                                  name=k,
                                  n_layers=size_ls[scope_idx],
                                  scope_idx=scope_idx,
                                  **subkwargs)
        elif k.startswith('Conv'):
            # e.g. {'Conv-up': 2}, {'Conv-id': 1}
            x = build_conv_layer(x,
                                 name=k,
                                 n_layers=size_ls[scope_idx],
                                 scope_idx=scope_idx,
                                 **subkwargs)
        else:
            raise ValueError('Unsupported module type: ' + k)

    y = torgb(x, y, num_channels=num_channels)
    images_out = y
    assert images_out.dtype == tf.as_dtype(dtype)
    return tf.identity(images_out, name='images_out')
Example #55
0
# plot_folder = './plot_folder/'

# Provide the directory to store the network reconstructed audio
dir_recon_audio = './dir_recon_audio_fft/'
try: 
    os.makedirs(dir_recon_audio, exist_ok = True) 
    print("Directory '%s' created successfully" %dir_recon_audio) 
except OSError as error: 
    print("Directory '%s' exists") 


t = 0
for k in data.keys():
	nf = k.split('_')[0]
	f0 = dict_fmap[nf]
	midival = (int)(69 + 12*np.log2(f0/440))

	if ((midival in pl) == False):
		continue

	print(t + 1)
	t = t + 1

	# Load STFT matrix from data 
	stft_in = data[k]['cc']
	# Normalizing factor
	nf =  np.max(abs(stft_in))
	stft_norm = torch.FloatTensor(stft_in/nf)

	p = torch.FloatTensor(midival*np.ones(stft_in.shape[0]))
	x_recon_cVAE,mu,sig,ztot = cVAE.forward(stft_norm,(p.float()/127).view(-1,1))
Example #56
0
    def fit(self, train_data, train_labels, val_data, val_labels):
        """
        Fits to training data.
        
        Args:
            train_data (ndarray): Training data.
            train_labels (ndarray): Training labels.
            val_data (ndarray): Validation data.
            val_labels (ndarray): Validation labels.
        """
        split = np.append(-np.ones(train_labels.shape, dtype=np.float32),
                  np.zeros(val_labels.shape, dtype=np.float32))
        ps = PredefinedSplit(split)

        sh = train_data.shape
        train_data = np.append(train_data, val_data , axis=0)
        train_labels = np.append(train_labels , val_labels, axis=0)
        del val_data, val_labels
        
        if self.kernel == 'linear':
            if self.probability:
                clf = SVC(kernel='linear', class_weight='balanced',
                          random_state=6, decision_function_shape='ovr',
                          max_iter=1000, probability=self.probability,
                          **self.scikit_args)
            else:
                clf = LinearSVC(class_weight='balanced', dual=False,
                                random_state=6, multi_class='ovr',
                                max_iter=1000, **self.scikit_args)
        
            #Cross-validate over these parameters
            params = {'C': 2.0**np.arange(-9,16,2,dtype=np.float)}
        elif self.kernel == 'rbf':
            clf = SVC(random_state=6, class_weight='balanced', cache_size=16000,
                      decision_function_shape='ovr',max_iter=1000, tol=1e-4, 
                      probability=self.probability, **self.scikit_args)            
            params = {'C': 2.0**np.arange(-9,16,2,dtype=np.float),
                      'gamma': 2.0**np.arange(-15,4,2,dtype=np.float)}

        #Coarse search      
        gs = GridSearchCV(clf, params, refit=False, n_jobs=self.n_jobs,  
                          verbose=self.verbosity, cv=ps)
        gs.fit(train_data, train_labels)
        
        #Fine-Tune Search
        if self.kernel == 'linear':
            best_C = np.log2(gs.best_params_['C'])
            params = {'C': 2.0**np.linspace(best_C-2,best_C+2,10,
                                            dtype=np.float)}
        elif self.kernel == 'rbf':
            best_C = np.log2(gs.best_params_['C'])
            best_G = np.log2(gs.best_params_['gamma'])
            params = {'C': 2.0**np.linspace(best_C-2,best_C+2,10,
                                            dtype=np.float),
                      'gamma': 2.0**np.linspace(best_G-2,best_G+2,10,
                                                dtype=np.float)}            
        
        self.gs = GridSearchCV(clf, params, refit=self.refit, n_jobs=self.n_jobs,  
                          verbose=self.verbosity, cv=ps)
        self.gs.fit(train_data, train_labels)
        
        if not self.refit:
            clf.set_params(C=gs.best_params_['C'])
            if self.kernel == 'rbf':
                clf.set_params(gamma=gs.best_params_['gamma'])
            self.gs = clf
            self.gs.fit(train_data[:sh[0]], train_labels[:sh[0]])
Example #57
0
 def nlogn(x):
     return x * numpy.log2(x)
Example #58
0
def splay(vec):
    """ Determine two lengths to split stride the input vector by
    """
    N2 = 2**int(numpy.log2(len(vec)) / 2)
    N1 = len(vec) / N2
    return N1, N2
Example #59
0
def hybrid_cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,
               bins_per_octave=12, tuning=None, filter_scale=1,
               norm=1, sparsity=0.01, window='hann', scale=True):
    '''Compute the hybrid constant-Q transform of an audio signal.

    Here, the hybrid CQT uses the pseudo CQT for higher frequencies where
    the hop_length is longer than half the filter length and the full CQT
    for lower frequencies.

    Parameters
    ----------
    y : np.ndarray [shape=(n,)]
        audio time series

    sr : number > 0 [scalar]
        sampling rate of `y`

    hop_length : int > 0 [scalar]
        number of samples between successive CQT columns.

    fmin : float > 0 [scalar]
        Minimum frequency. Defaults to C1 ~= 32.70 Hz

    n_bins : int > 0 [scalar]
        Number of frequency bins, starting at `fmin`

    bins_per_octave : int > 0 [scalar]
        Number of bins per octave

    tuning : None or float in `[-0.5, 0.5)`
        Tuning offset in fractions of a bin (cents).

        If `None`, tuning will be automatically estimated.

    filter_scale : float > 0
        Filter filter_scale factor. Larger values use longer windows.

    sparsity : float in [0, 1)
        Sparsify the CQT basis by discarding up to `sparsity`
        fraction of the energy in each basis.

        Set `sparsity=0` to disable sparsification.

    window : str, tuple, number, or function
        Window specification for the basis filters.
        See `filters.get_window` for details.


    Returns
    -------
    CQT : np.ndarray [shape=(n_bins, t), dtype=np.float]
        Constant-Q energy for each frequency at each time.

    Raises
    ------
    ParameterError
        If `hop_length` is not an integer multiple of
        `2**(n_bins / bins_per_octave)`

        Or if `y` is too short to support the frequency range of the CQT.

    See Also
    --------
    cqt
    pseudo_cqt

    Notes
    -----
    This function caches at level 20.

    '''

    if fmin is None:
        # C1 by default
        fmin = note_to_hz('C1')

    if tuning is None:
        tuning = estimate_tuning(y=y, sr=sr)

    # Get all CQT frequencies
    freqs = cqt_frequencies(n_bins, fmin,
                            bins_per_octave=bins_per_octave,
                            tuning=tuning)

    # Compute the length of each constant-Q basis function
    lengths = filters.constant_q_lengths(sr, fmin,
                                         n_bins=n_bins,
                                         bins_per_octave=bins_per_octave,
                                         tuning=tuning,
                                         filter_scale=filter_scale,
                                         window=window)

    # Determine which filters to use with Pseudo CQT
    # These are the ones that fit within 2 hop lengths after padding
    pseudo_filters = 2.0**np.ceil(np.log2(lengths)) < 2 * hop_length

    n_bins_pseudo = int(np.sum(pseudo_filters))

    n_bins_full = n_bins - n_bins_pseudo
    cqt_resp = []

    if n_bins_pseudo > 0:
        fmin_pseudo = np.min(freqs[pseudo_filters])

        cqt_resp.append(pseudo_cqt(y, sr,
                                   hop_length=hop_length,
                                   fmin=fmin_pseudo,
                                   n_bins=n_bins_pseudo,
                                   bins_per_octave=bins_per_octave,
                                   tuning=tuning,
                                   filter_scale=filter_scale,
                                   norm=norm,
                                   sparsity=sparsity,
                                   window=window,
                                   scale=scale))

    if n_bins_full > 0:
        cqt_resp.append(np.abs(cqt(y, sr,
                                   hop_length=hop_length,
                                   fmin=fmin,
                                   n_bins=n_bins_full,
                                   bins_per_octave=bins_per_octave,
                                   tuning=tuning,
                                   filter_scale=filter_scale,
                                   norm=norm,
                                   sparsity=sparsity,
                                   window=window,
                                   scale=scale)))

    return __trim_stack(cqt_resp, n_bins)
Example #60
0
    def __init__(self, data, input_dims=None, output_dims=None):
        """Initialize a quantum channel Chi-matrix operator.

        Args:
            data (QuantumCircuit or
                  Instruction or
                  BaseOperator or
                  matrix): data to initialize superoperator.
            input_dims (tuple): the input subsystem dimensions.
                                [Default: None]
            output_dims (tuple): the output subsystem dimensions.
                                 [Default: None]

        Raises:
            QiskitError: if input data is not an N-qubit channel or
            cannot be initialized as a Chi-matrix.

        Additional Information
        ----------------------
        If the input or output dimensions are None, they will be
        automatically determined from the input data. The Chi matrix
        representation is only valid for N-qubit channels.
        """
        # If the input is a raw list or matrix we assume that it is
        # already a Chi matrix.
        if isinstance(data, (list, np.ndarray)):
            # Initialize from raw numpy or list matrix.
            chi_mat = np.array(data, dtype=complex)
            # Determine input and output dimensions
            dim_l, dim_r = chi_mat.shape
            if dim_l != dim_r:
                raise QiskitError('Invalid Chi-matrix input.')
            if input_dims:
                input_dim = np.product(input_dims)
            if output_dims:
                output_dim = np.product(input_dims)
            if output_dims is None and input_dims is None:
                output_dim = int(np.sqrt(dim_l))
                input_dim = dim_l // output_dim
            elif input_dims is None:
                input_dim = dim_l // output_dim
            elif output_dims is None:
                output_dim = dim_l // input_dim
            # Check dimensions
            if input_dim * output_dim != dim_l:
                raise QiskitError("Invalid shape for Chi-matrix input.")
        else:
            # Otherwise we initialize by conversion from another Qiskit
            # object into the QuantumChannel.
            if isinstance(data, (QuantumCircuit, Instruction)):
                # If the input is a Terra QuantumCircuit or Instruction we
                # convert it to a SuperOp
                data = SuperOp._init_instruction(data)
            else:
                # We use the QuantumChannel init transform to initialize
                # other objects into a QuantumChannel or Operator object.
                data = self._init_transformer(data)
            input_dim, output_dim = data.dim
            # Now that the input is an operator we convert it to a Chi object
            chi_mat = _to_chi(data.rep, data._data, input_dim, output_dim)
            if input_dims is None:
                input_dims = data.input_dims()
            if output_dims is None:
                output_dims = data.output_dims()
        # Check input is N-qubit channel
        n_qubits = int(np.log2(input_dim))
        if 2**n_qubits != input_dim:
            raise QiskitError("Input is not an n-qubit Chi matrix.")
        # Check and format input and output dimensions
        input_dims = self._automatic_dims(input_dims, input_dim)
        output_dims = self._automatic_dims(output_dims, output_dim)
        super().__init__('Chi', chi_mat, input_dims, output_dims)