Example #1
0
def extract_sub_matrix(mat, inds):
    """ Extract submatrix of `mat` by deleting `inds` rows/cols
    """
    for i in sorted(inds, reverse=True):
        mat = np.delete(mat, i, axis=0)
        mat = np.delete(mat, i, axis=1)
    return mat
Example #2
0
 def removeOldestEntriesFromDataSet(self, quantity):
     datasetSize = self.getDatasetLength()
     if datasetSize <= quantity:
         self.clearDataSet()
         return
     self._data['input']  = _np.delete(self._data['input'], xrange(quantity), 0)
     self._data['target'] = _np.delete(self._data['target'], xrange(quantity), 0)
Example #3
0
    def gstamp(self, ports_v, time=0, reduced=True):
        """Returns the differential (trans)conductance wrt the port specified by port_index
        when the element has the voltages specified in ports_v across its ports,
        at (simulation) time.

        ports_v: a list in the form: [voltage_across_port0, voltage_across_port1, ...]
        port_index: an integer, 0 <= port_index < len(self.get_ports())
        time: the simulation time at which the evaluation is performed. Set it to
        None during DC analysis.
        """
        indices = ([self.n1 - 1]*2 + [self.n2 - 1]*2,
                   [self.n1 - 1, self.n2 - 1]*2)
        gm = self.model.get_gm(self.model, 0, utilities.tuplinator(ports_v), 0, self.device)
        if gm == 0:
            gm = options.gmin*2
        stamp = np.array(((gm, -gm),
                          (-gm, gm)), dtype=np.float64)
        if reduced:
            zap_rc = [pos for pos, i in enumerate(indices[1][:2]) if i == -1]
            stamp = np.delete(stamp, zap_rc, axis=0)
            stamp = np.delete(stamp, zap_rc, axis=1)
            indices = tuple(zip(*[(i, y) for i, y in zip(*indices) if (i != -1 and y != -1)]))
            stamp_flat = stamp.reshape(-1)
            stamp_folded = []
            indices_folded = []
            for ix, it in enumerate([(i, y) for i, y in zip(*indices)]):
                if it not in indices_folded:
                    indices_folded.append(it)
                    stamp_folded.append(stamp_flat[ix])
                else:
                    w = indices_folded.index(it)
                    stamp_folded[w] += stamp_flat[ix]
            indices = tuple(zip(*indices_folded))
            stamp = np.array(stamp_folded)
        return indices, stamp
Example #4
0
    def model_and_predict(self, X_train, y_train, X_test):
        district_idx = self.columns.index('PdDistrict')
        districts = set(X_train[:,district_idx])
        district_ys = {}
        # Grow forest and predict separately for each district's records
        for d in districts:
            district_X_train = X_train[X_train[:, district_idx] == d]
            district_X_train = np.delete(district_X_train, district_idx, 1)
            district_y_train = y_train[X_train[:, district_idx] == d]
            district_X_test = X_test[X_test[:, district_idx] == d]
            district_X_test = np.delete(district_X_test, district_idx, 1)
            print "Growing forest for", d

            # Not saving output in Git so make this deterministic 
            # with random_state
            rf = RandomForestClassifier(n_estimators=self.n_trees, n_jobs=-1,
                                        random_state=782629)
            rf.fit(district_X_train, district_y_train)

            district_ys[d] = list(rf.predict(district_X_test))
            print "Finished", d

        print "All predictions made"

        y_hat = []
        for row in X_test:
            d_ys = district_ys[row[district_idx]]
            y_hat.append(d_ys.pop(0))

        return y_hat
Example #5
0
    def append_new_point(self, y, x=None):
        self._axis_y_array = np.append(self._axis_y_array, y)
        if x:
            self._axis_x_array = np.append(self._axis_x_array, x)
        else:
            self._axis_x_array = np.arange(len(self._axis_y_array))

        if self.max_plot_points:
            if self._axis_y_array.size > self.max_plot_points:
                self._axis_y_array = np.delete(self._axis_y_array, 0)
                self._axis_x_array = np.delete(self._axis_x_array, 0)

        if self.single_curve is None:
            self.single_curve, = self.axes.plot(
                self._axis_y_array, linewidth=2, marker="s"
            )
        else:
            self.axes.fill(self._axis_y_array, "r", linewidth=2)

        self._axis_y_limits[1] = (
            self._axis_y_array.max() + self._axis_y_array.max() * 0.05
        )
        self.axes.set_ylim(self._axis_y_limits)
        self.single_curve.set_xdata(self._axis_x_array)
        self.single_curve.set_ydata(self._axis_y_array)
        self.axes.relim()
        self.axes.autoscale_view()
        self.fig.canvas.draw()
        self.fig.canvas.flush_events()
        self.axes.grid(True)

        # TODO move y lims as propery
        self.axes.set_ylim(
            (0, self._axis_y_array.max() + self._axis_y_array.max() * 0.05)
        )
Example #6
0
def solveBlockGlasso(signal):
    start = int(signal[0]) # include
    S_Matrix  = S_Matrix_bc.value
    W_matrix = W_Matrix_bc.value
    old_W = np.copy(W_matrix)
    end   = min(int(signal[1]),S_Matrix.shape[0]) # non-inclusive
    deltamatrix = np.zeros(S_Matrix.shape)
    NN = S_Matrix.shape[0]
    for n in range(start,end):
        W11 = np.delete(W_matrix,n,0)
        W11 = np.delete(W11,n,1)
        Z   = linalg.sqrtm(W11)

        s11 = S_Matrix[:,n]
        s11 = np.delete(s11,n)
        Y   = np.dot(nplinalg.inv(linalg.sqrtm(W11)),s11)
	Y = np.real(Y)
	Z = np.real(Z)
	B = lasso(Z,Y,beta_value)

    updated_column = np.dot(W11,B)

    matrix_ind = np.array(range(0,NN))
    matrix_ind = np.delete(matrix_ind,n)
    column_ind = 0
    for k in matrix_ind:
        deltamatrix[k,n]=updated_column[column_ind] - W_matrix[k,n]
        deltamatrix[n,k]=updated_column[column_ind] - W_matrix[k,n]
	    W_matrix[k,n] = updated_column[column_ind]
	    W_matrix[n,k] = updated_column[column_ind]
        column_ind = column_ind+1
Example #7
0
def non_max_suppression(boxes, scores, threshold):
    """Performs non-maximum supression and returns indicies of kept boxes.
    boxes: [N, (y1, x1, y2, x2)]. Notice that (y2, x2) lays outside the box.
    scores: 1-D array of box scores.
    threshold: Float. IoU threshold to use for filtering.
    """
    assert boxes.shape[0] > 0
    if boxes.dtype.kind != "f":
        boxes = boxes.astype(np.float32)

    # Compute box areas
    y1 = boxes[:, 0]
    x1 = boxes[:, 1]
    y2 = boxes[:, 2]
    x2 = boxes[:, 3]
    area = (y2 - y1) * (x2 - x1)

    # Get indicies of boxes sorted by scores (highest first)
    ixs = scores.argsort()[::-1]

    pick = []
    while len(ixs) > 0:
        # Pick top box and add its index to the list
        i = ixs[0]
        pick.append(i)
        # Compute IoU of the picked box with the rest
        iou = compute_iou(boxes[i], boxes[ixs[1:]], area[i], area[ixs[1:]])
        # Identify boxes with IoU over the threshold. This
        # returns indicies into ixs[1:], so add 1 to get
        # indicies into ixs.
        remove_ixs = np.where(iou > threshold)[0] + 1
        # Remove indicies of the picked and overlapped boxes.
        ixs = np.delete(ixs, remove_ixs)
        ixs = np.delete(ixs, 0)
    return np.array(pick, dtype=np.int32)
def stftFiltering(x, fs, w, N, H, filter):
# apply a filter to a sound by using the STFT
# x: input sound, w: analysis window, N: FFT size, H: hop size
# filter: magnitude response of filter with frequency-magnitude pairs (in dB)
# returns y: output sound
	M = w.size                                     # size of analysis window
	hM1 = int(math.floor((M+1)/2))                 # half analysis window size by rounding
	hM2 = int(math.floor(M/2))                     # half analysis window size by floor
	x = np.append(np.zeros(hM2),x)                 # add zeros at beginning to center first window at sample 0
	x = np.append(x,np.zeros(hM1))                 # add zeros at the end to analyze last sample
	pin = hM1                                      # initialize sound pointer in middle of analysis window       
	pend = x.size-hM1                              # last sample to start a frame
	w = w / sum(w)                                 # normalize analysis window
	y = np.zeros(x.size)                           # initialize output array
	while pin<=pend:                               # while sound pointer is smaller than last sample      
	#-----analysis-----  
		x1 = x[pin-hM1:pin+hM2]                      # select one frame of input sound
		mX, pX = DFT.dftAnal(x1, w, N)               # compute dft
	#------transformation-----
		mY = mX + filter                             # filter input magnitude spectrum
	#-----synthesis-----
		y1 = DFT.dftSynth(mY, pX, M)                # compute idft
		y[pin-hM1:pin+hM2] += H*y1                  # overlap-add to generate output sound
		pin += H                                    # advance sound pointer
	y = np.delete(y, range(hM2))                  # delete half of first window which was added in stftAnal
	y = np.delete(y, range(y.size-hM1, y.size))   # add zeros at the end to analyze last sample
	return y
def update_extra_mat(matfile,to_remove):
    """ updates the time_frames, confounds and mask_suppressed arrays to
    reflect the removed volumes. However, does not change other items in
    _extra.mat file

    """

    mat = loadmat(matfile)
    # update time_frames
    ntf = np.delete(mat['time_frames'][0],to_remove)
    mat.update({'time_frames': ntf})

    # update confounds
    ncon = np.delete(mat['confounds'],to_remove,axis = 0)
    mat.update({'confounds': ncon})

    # update mask_suppressed
    ms = mat['mask_suppressed']
    for supp in to_remove:
        ms[supp][0] = 1
    mat.update({'mask_suppressed': ms})

    # save updated mat file
    jnk, flnme = os.path.split(matfile)
    savemat(os.path.join(output_dir,flnme),mat)
Example #10
0
def sineModelSynth(tfreq, tmag, tphase, N, H, fs):
	"""
	Synthesis of a sound using the sinusoidal model
	tfreq,tmag,tphase: frequencies, magnitudes and phases of sinusoids
	N: synthesis FFT size, H: hop size, fs: sampling rate
	returns y: output array sound
	"""
	
	hN = N/2                                                # half of FFT size for synthesis
	L = tfreq.shape[0]                                      # number of frames
	pout = 0                                                # initialize output sound pointer         
	ysize = H*(L+3)                                         # output sound size
	y = np.zeros(ysize)                                     # initialize output array
	sw = np.zeros(N)                                        # initialize synthesis window
	ow = triang(2*H)                                        # triangular window
	sw[hN-H:hN+H] = ow                                      # add triangular window
	bh = blackmanharris(N)                                  # blackmanharris window
	bh = bh / sum(bh)                                       # normalized blackmanharris window
	sw[hN-H:hN+H] = sw[hN-H:hN+H]/bh[hN-H:hN+H]             # normalized synthesis window
	lastytfreq = tfreq[0,:]                                 # initialize synthesis frequencies
	ytphase = 2*np.pi*np.random.rand(tfreq[0,:].size)       # initialize synthesis phases 
	for l in range(L):                                      # iterate over all frames
		if (tphase.size > 0):                                 # if no phases generate them
			ytphase = tphase[l,:] 
		else:
			ytphase += (np.pi*(lastytfreq+tfreq[l,:])/fs)*H     # propagate phases
		Y = UF.genSpecSines(tfreq[l,:], tmag[l,:], ytphase, N, fs)  # generate sines in the spectrum         
		lastytfreq = tfreq[l,:]                               # save frequency for phase propagation
		ytphase = ytphase % (2*np.pi)                         # make phase inside 2*pi
		yw = np.real(fftshift(ifft(Y)))                       # compute inverse FFT
		y[pout:pout+N] += sw*yw                               # overlap-add and apply a synthesis window
		pout += H                                             # advance sound pointer
	y = np.delete(y, range(hN))                             # delete half of first window
	y = np.delete(y, range(y.size-hN, y.size))              # delete half of the last window 
	return y
Example #11
0
def project_into_plane(index, r0, rm):
    r'''Projects out-of-plane resolution into a specified plane by performing
    a gaussian integral over the third axis.

    Parameters
    ----------
    index : int
        Index of the axis that should be integrated out

    r0 : float
        Resolution prefactor

    rm : ndarray
        Resolution array

    Returns
    -------
    mp : ndarray
        Resolution matrix in a specified plane

    '''

    r = np.sqrt(2 * np.pi / rm[index, index]) * r0
    mp = rm

    b = rm[:, index] + rm[index, :].T
    b = np.delete(b, index, 0)

    mp = np.delete(mp, index, 0)
    mp = np.delete(mp, index, 1)

    mp -= 1 / (4. * rm[index, index]) * np.outer(b, b.T)

    return [r, mp]
def edit_description(instance):

    # twenty different categories
    scores = [0] * 20

    # Strip out all the punctuation
    unstripped = instance[9].lower()
    for c in string.punctuation:
        unstripped = unstripped.replace(c,"")

    description = unstripped.split()

    # add to the score if a word matches a category
    # 10 is the description
    for word in description:
        for i, category in enumerate(LDA):
            if word in category:
                scores[i] += 1

    # save the target
    target = instance[-1]

    # get rid of the description and target columns
    instance = np.delete(instance, 10, 0) # 10 is which column, 1 means column, 0 means row
    instance = np.delete(instance, -1, 0)

    # add the scores
    instance = np.append(instance, scores)

    # add the target back on the end
    return np.append(instance, target)
def build_tree(data, labels, word_data, level):
    if (level == 0):
        #return label value which is dominant
        return LabelConv[st.mode(labels)[0][0]-1];
    #select appropriate attribute for the node:
    best, best_ig = attribute_selection(data,labels);
    best_data = data[:,best]; best_word = word_data[best];
    #remove all regarding that attribute from the data:
    word_data = np.delete(word_data,best,0);
    left_data = np.delete(data[best_data == 0,:],best,1); 
    right_data = np.delete(data[best_data == 1,:],best,1);
    #divide labels into two subarray based on selected attribute:
    left_labl = labels[best_data == 0]; 
    right_labl = labels[best_data == 1];
    if (check_label(left_labl) == 2 and level != 0):
        #since label is mono-valued:
        left = LabelConv[left_labl[0]-1];
    else:
        left = build_tree(left_data,left_labl,word_data,level-1);
    if (check_label(right_labl) == 2 and level != 0):
        #since label is mono-valued:
        right = LabelConv[right_labl[0]-1];
    else:
        right = build_tree(right_data,right_labl,word_data,level-1);
    subtrees = {0: left, 1: right};
    return (best_word,best_ig,subtrees);
Example #14
0
def reduce_dimension(m):
    """
    reduce the dimension of game matrix based on domination --
    player I will be better off if one row constently larger than another
    player II will be better off if one col constently smaller than anthoer
    Output: the reduced-size game matrix
    Note: This implements stric domination.
    TODO: convex reduction
    """
    local = np.array(m)
    flag = True
    while True:
        rbefore = len(local)
        candidates = []
        for nr in permutations(range(len(local)), 2):
            bigger = reduce(lambda x,y: x and y, local[nr[0]]>local[nr[1]])
            if bigger: 
                candidates.append(nr[1])
        for i in candidates:
            local = np.delete(local, i, 0)

        cbefore = len(local[0])
        candidates = []
        for nc in permutations(range(len(local[0])), 2):
            smaller = reduce(lambda x,y: x and y, local[:,nc[0]]<local[:, nc[1]])
            if smaller:
                candidates.append(nc[1])
        for i in candidates:
            local = np.delete(local, i, 1)

        if len(local[0])==cbefore and len(local)==rbefore:
            break

    return local
Example #15
0
def pixel_coordinates(nx, ny, mode="centers"):
    """Get pixel coordinates from a regular grid with dimension nx by ny.

    Parameters
    ----------
    nx : int
        xsize
    ny : int
        ysize
    mode : string
        `centers` or `edges` to return the pixel coordinates
        defaults to centers
    Returns
    -------
    coordinates : :class:`numpy:numpy.ndarray`
         Array of shape (ny,nx) with pixel coordinates (x,y)

    """
    x = np.linspace(0, nx, num=nx + 1)
    y = np.linspace(0, ny, num=ny + 1)
    if mode == "centers":
        x = x + 0.5
        y = y + 0.5
        x = np.delete(x, -1)
        y = np.delete(y, -1)
    X, Y = np.meshgrid(x, y)
    coordinates = np.empty(X.shape + (2,))
    coordinates[:, :, 0] = X
    coordinates[:, :, 1] = Y
    return coordinates
Example #16
0
def stochasticModelSynth(stocEnv, H, N):
	"""
	Stochastic synthesis of a sound
	stocEnv: stochastic envelope; H: hop size; N: fft size
	returns y: output sound
	"""

	if not(UF.isPower2(N)):                                 	# raise error if N not a power of two
		raise ValueError("N is not a power of two")
 
	hN = N/2+1                                            		# positive size of fft
	No2 = N/2							# half of N
	L = stocEnv[:,0].size                                    	# number of frames
	ysize = H*(L+3)                                         	# output sound size
	y = np.zeros(ysize)                                     	# initialize output array
	ws = 2*hanning(N)                                        	# synthesis window
	pout = 0                                                 	# output sound pointer
	for l in range(L):                    
		mY = resample(stocEnv[l,:], hN)                        # interpolate to original size
		pY = 2*np.pi*np.random.rand(hN)                        # generate phase random values
		Y = np.zeros(N, dtype = complex)                       # initialize synthesis spectrum
		Y[:hN] = 10**(mY/20) * np.exp(1j*pY)                   # generate positive freq.
		Y[hN:] = 10**(mY[-2:0:-1]/20) * np.exp(-1j*pY[-2:0:-1]) # generate negative freq.
		fftbuffer = np.real(ifft(Y))                           # inverse FFT
		y[pout:pout+N] += ws*fftbuffer                         # overlap-add
		pout += H  
	y = np.delete(y, range(No2))                              # delete half of first window
	y = np.delete(y, range(y.size-No2, y.size))               # delete half of the last window 
	return y
def MaxImpedanceComputation(InputGraph):
	
	MaxTotalImpedance=0
	
	G=InputGraph.copy()
	number_of_vertices=G.order()
	vertexlist=G.nodes()
	
	for top_node in vertexlist:
		for ground_node in vertexlist:
			if ground_node<top_node:
				ordered_vertexlist=vertexlist[:]
				ordered_vertexlist.remove(top_node)
				ordered_vertexlist.remove(ground_node)
				ordered_vertexlist.insert(0,top_node)
				ordered_vertexlist.insert(0,ground_node)
				
				LaplacianMatrix=nx.laplacian(G,ordered_vertexlist)
				ConductanceMatrix=np.delete(LaplacianMatrix,0,0)
				ConductanceMatrix=np.delete(ConductanceMatrix,np.s_[0],1)
				InputVector=[0]*(number_of_vertices-1)
				InputVector[0]=1
				VoltageVector=linalg.solve(ConductanceMatrix,InputVector)
				TotalImpedance=VoltageVector[0]
				if TotalImpedance>MaxTotalImpedance:
					MaxTotalImpedance=TotalImpedance
	
	return MaxTotalImpedance
Example #18
0
def err_plot(output,basedir='.', field='MLWA', err='dMLWA', suffix='syserr',
             label=r'$\tau_L$',err_label=r'$\delta\tau_{L,\mathrm{sys}}$',exclude=exclude):

    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.set_xlabel(label)
    ax.set_ylabel(err_label)
    
    for p in range(6):
        coef = '{}/NGC_891_P{}_bin30_allz2.{}.fits'.format(basedir,p+1,suffix)
        print coef
        c = pyfits.open(coef)[1].data
        
        exarr = np.array(exclude[p]) - 1
        d = np.delete(c[field], exarr)
        e = np.delete(c[err], exarr)

        ax.scatter(d,e/d, c='k', alpha=0.7, linewidth=0)

    ax.set_yticks([0.1,0.2,0.3,0.4,0.5])

    pp = PDF(output)
    pp.savefig(fig)
    pp.close()
    plt.close(fig)

    return
Example #19
0
def carbonylorcarboxyl(allligand,index,bond_dist):

	allligandcoods = allligand.positions
	ocoods = np.zeros((1,3), dtype = float)
	ocoods[0,:] = allligandcoods[index,:]
	ocoods = np.float32(ocoods)

	tempdist = MDAnalysis.lib.distances.distance_array(ocoods,allligandcoods)
	A = np.argsort(tempdist)
	temp = int(A[0,1])

	Omatecood = np.zeros((1,3), dtype = float)
	Omatecood[0,:] = allligandcoods[temp,:]
	Omatecood = np.float32(Omatecood)

	tempdist2 = MDAnalysis.lib.distances.distance_array(Omatecood, allligandcoods)
	B = np.argsort(tempdist2)
	B = np.delete(B,0,axis = 1)
	for i in xrange(0,B.size):
		if B[0,i] == index:
			C = np.delete(B,i,axis = 1)
			break

	base1 = int(C[0,0])
	base2 = int(C[0,1])
	type1 = allligand[base1].type
	type2 = allligand[base2].type

	if type1 == 'O' or type2 == 'O':
		atype = 'carboxyl'
	else:
		atype = 'carbonyl'

	return atype
Example #20
0
 def make_DeviationPlot(self,year):
     average = np.array(self.average)
     deviation = np.array(self.deviation)
     
     dates = []
     dis = []
     count = 0
     for d in self.time:
         if self.time[count].year == year:
             dates.append(datetime.date(self.time[count].year, self.time[count].month,self.time[count].day))
             dis.append(self.discharge[count])
             
         count += 1
     
     dis = np.array(dis)
     dates = np.array(dates)  
     
     if len(dates) == 365:
         average = np.delete(average,-1)
         deviation = np.delete(deviation,-1)
         
     plus1 = np.array(average+deviation)
     minus1 = np.array(average-deviation)
     
             
     plt.plot(dates,dis,'r')
     x = np.linspace(1,366,366)
     plt.plot(dates,average,'k')
     plt.fill_between(dates,average,plus1,facecolor='gray')
     plt.fill_between(dates,average,minus1,facecolor='gray')
def findDistance(record, data, result):
    transidsfortrain = data[:, [0]]
    data = np.delete(data, 0, 1)
    numAttributes = len(data[0])
    trainClasses = data[:, [numAttributes - 1]]
    data = np.delete(data, (numAttributes - 1), 1)
    counter = 0
    for row in record:
        currentRecord = row[1:-1]
        print currentRecord
        tempResult = (data - currentRecord) ** 2
        tempResult = np.sum(tempResult, axis=1).reshape(len(tempResult), 1)
        tempResult = np.sqrt(tempResult)
        tempResult = np.hstack((tempResult, trainClasses))
        tempResult = tempResult[np.argsort(tempResult[:, 0])]
        result[counter][1] = tempResult[0][0]
        result[counter][2] = tempResult[0][1]
        result[counter][3] = tempResult[1][0]
        result[counter][4] = tempResult[1][1]
        result[counter][5] = tempResult[2][0]
        result[counter][6] = tempResult[2][1]
        result[counter][7] = tempResult[3][0]
        result[counter][8] = tempResult[3][1]
        result[counter][9] = tempResult[4][0]
        result[counter][10] = tempResult[4][1]
        # TODO more things will be appended to result if value of n changes
        result[counter][11] = tempResult[5][0]
        result[counter][12] = tempResult[5][1]
        counter += 1
 def execEnd(self,eventIdx):
     # execute an end-breaking or depolymerization event.
     oligoEndBreak=self.ald['end'][eventIdx/2]
     leftRight=eventIdx%2*2-1
     lr=-(leftRight+1)/2
     unitMoving=oligoEndBreak.ends[lr]
     oligo_vanish,form_oligo,self.event_code=oligoEndBreak.end_break(leftRight,self.units)
     if form_oligo:
         # not empty
         mono=form_oligo['monomer']
         if mono:
             # monomer + monomer (mergeOligo)
             idx=np.where([x in [mono,unitMoving] for x in self.monomers])[0]
             self.monomers=np.delete(self.monomers,idx)
             self.oligos=np.insert(self.oligos,0,form_oligo['oligo'])
         else:
             # monomer + multimer (mergeOligo)
             idx=np.where([unitMoving is x for x in self.monomers])[0]
             self.monomers=np.delete(self.monomers,idx)
     else:
         #empty, add the end to monomers
         self.monomers=np.insert(self.monomers,0,unitMoving)
         unitMoving.energize()
     
     if oligo_vanish:
         idx=np.where([oligoEndBreak is x for x in self.oligos])[0]
         self.oligos=np.delete(self.oligos,idx)
         
         idx=np.where([unitMoving is not x for x in oligoEndBreak.subunits])[0]
         nonmoving_unit=oligoEndBreak.subunits[idx[0]]
         self.monomers=np.insert(self.monomers,0,nonmoving_unit)
         nonmoving_unit.energize()
Example #23
0
	def StoreTransition(self, s_t, a_t, r_t, s_t_next, d_t=0):
		s_t = s_t.reshape(1, self.state_size)
		s_t_next = s_t_next.reshape(1, self.state_size)
		a_t = a_t.reshape(1, self.action_size)
		r_t = r_t.reshape(1, 1)
		d_t = np.array([d_t]).reshape(1, 1)
		
		self.S = np.concatenate((self.S, s_t))
		self.Stag = np.concatenate((self.Stag, s_t_next))
		self.A = np.concatenate((self.A, a_t))
		self.R = np.concatenate((self.R, r_t))
		self.D = np.concatenate((self.D, d_t))

		if self.populated < self.buffer_size:
			if self.populated == 0:
				self.S = np.delete(self.S,0,0)
				self.A = np.delete(self.A,0,0)
				self.R = np.delete(self.R,0,0)
				self.Stag = np.delete(self.Stag,0,0)
				self.D = np.delete(self.D,0,0)
			self.populated += 1
		else:
			self.S = np.delete(self.S,0,0)
			self.A = np.delete(self.A,0,0)
			self.R = np.delete(self.R,0,0)
			self.Stag = np.delete(self.Stag,0,0)
			self.D = np.delete(self.D,0,0)
 def loadGlob(self, simu, Z, S):
     file_root = self._fileRoot(simu, Z, S)
     data = np.loadtxt(self.dir + file_root + '/' + file_root + '_all.deus_histo.txt')
     data = np.delete(data, self.nb_histo-1)        
     densityscale = np.linspace(self.glob_start,self.glob_end,self.nb_histo,0)
     densityscale = np.delete(densityscale, self.nb_histo-1)        
     return densityscale, data
Example #25
0
  def update_proximity_matrix(self, old_prox, new_centroid, a, b):
    old_prox = np.delete(old_prox, [a,b], 0) #delete rows
    old_prox = np.delete(old_prox, [a,b], 1) #delete cols

    # add a line of zeroes on the right and bottom edges
    mid = np.hstack((old_prox, np.zeros((old_prox.shape[0], 1), dtype=old_prox.dtype)))
    pprint(("mid", mid, mid.shape))
    new_prox = np.vstack((mid, np.zeros((1, mid.shape[1]), dtype=mid.dtype)))

    pprint(("expanded", new_prox, new_prox.shape))

    old_length = len(old_prox) - 1
    new_length = len(new_prox) - 1

    #fill them in with new comparisons
    new_prox[new_length,new_length] = float(HIGH)

    for i, centroid in enumerate(self.centroids[:-1]):
      diff = np.linalg.norm(centroid - new_centroid)

      pprint(("checking", diff, i))

      new_prox[new_length,i] = diff
      new_prox[i,new_length] = diff

    pprint(("new prox", new_prox, new_prox.shape))

    return new_prox
Example #26
0
    def _rebuild_iso(self, sel):
        g = self.graph
        ss = [p.plots[pp][0] for p in g.plots
              for pp in p.plots
              if pp == 'data{}'.format(self.group_id)]

        self._set_renderer_selection(ss, sel)

        if self._cached_data:
            reg=self._cached_reg
            xs, ys, xerr, yerr = self._cached_data

            nxs = delete(xs, sel)
            nys = delete(ys, sel)
            nxerr = delete(xerr, sel)
            nyerr = delete(yerr, sel)

            # reg = ReedYorkRegressor(xs=nxs, ys=nys,
            #                         xserr=nxerr, yserr=nyerr)
            reg.trait_set(xs=nxs, ys=nys,xserr=nxerr, yserr=nyerr)
            reg.calculate()

            fit = self.graph.plots[0].plots['fit{}'.format(self.group_id)][0]

            mi, ma = self.graph.get_x_limits()
            rxs = linspace(mi, ma, 10)

            rys = reg.predict(rxs)

            fit.index.set_data(rxs)
            fit.value.set_data(rys)

            if self._plot_label:
                self._add_info(self.graph.plots[0], reg, label=self._plot_label)
Example #27
0
def clean_features(features, labels):
  # remove features missing in a lot of samples
  feature_threshold = [300, 250, 200, 150, 100]
  sample_threshold = [20, 15, 10, 5, 0]

  for f, s in zip(feature_threshold, sample_threshold):
    remove_cols = explore_features(features, f)
    features = np.delete(features, remove_cols, axis=1)
    print features.shape
    print '---'
    
    # remove samples missing data
    remove_rows = explore_samples(features, s)
    features = np.delete(features, remove_rows, axis=0)
    labels = np.delete(labels, remove_rows)
    print features.shape, labels.shape
    print '---'

  # RATIONALE: any feature missing in more than 5% of 
  # samples has no guarantee of being collected so do
  # not include in model and any sample still missing
  # data probably is fairly unknown or poorly recorded

  # TODO: efficiently remove NaNs while keeping as much data as possibles
  return features, labels
Example #28
0
    def find_offset_old(self,datafile, nonlinmin, nonlinmax, exclude, threshold):
        '''find_offset is used to determine the systematic offset present
        in the experimental setup that causes data to not be symmetric
        about zero input angle. It reads in the output of laserBench and
        returns the offset (in degrees)'''
        
        input_a, output_a = np.loadtxt(datafile,usecols=(0,1),unpack=True)
        
        for e in exclude:
            did = np.where(input_a == e)
            output_a = np.delete(output_a, did)
            input_a = np.delete(input_a, did)

        pidx = np.where(input_a > nonlinmax)
        nidx = np.where(input_a < nonlinmin)
        
        in_a = np.append(input_a[nidx],input_a[pidx])
        out_a = np.append(-1*output_a[nidx],output_a[pidx])
        error = np.zeros(in_a.size)+1

        b = 1000.
        offset = 0.
        while abs(b) > threshold:
            m, b = ADE.fit_line(in_a,out_a,error)
            offset += b
            in_a += b

        return offset
def sortArray(x, y):
    order_indicies = []
    delete_indicies = []
    for i in range( 1, len(x) ):
        if (x[i] == x[i-1]):
            print 'duplicate at', i, x[i]
            delete_indicies.append(i)

        if (len(delete_indicies) > 4):
            return x, y, True

    for i in delete_indicies:
        x = np.delete(x, i)
        y = np.delete(y, i)

    for i in range( 1, len(x) ):
        if (x[i] < x[i-1]):
            x_temp = x[i]
            y_temp = y[i]
            x[i] = x[i-1]
            x[i-1] = x_temp
            y[i] = y[i-1]
            y[i-1] = y_temp
            print 'reorder data at', i, x[i]
            order_indicies.append(i)

        if (( len(order_indicies)+len(delete_indicies) ) > 4):
            return x, y, True

    return x, y, False
Example #30
0
def load_pics(path):
    #get all images
    npts = 32    

    #find all the png files in the current path
    images = [os.path.join(path,f) for f in os.listdir(path) if os.path.splitext(f)[1] == '.png']
    desc_list = np.array(np.zeros(npts*128))
    #numerical classes array
    classy = []

    for pic in images:
        desc, kp = getDescriptorKp(pic, npts)
        #sometimes not all 32 descriptors are returned because there's not enough, in which case we just pad up to 32 descriptors * 128 values/desc
        desc_list = np.vstack( (desc_list, np.resize(desc.flatten(), (1, npts*128))) )

        #figure what pokemon it is from the file name
        match = re.search(r"pokemon-(\d+)-", pic).group(1)
        classy.append(int(match))

    #convert classy array to a set of logical arrays
    classfication = np.array(np.zeros(NPOKEMON))
    for i in classy:
        tmp = np.zeros(NPOKEMON)
        tmp[i-1] = 1
        classfication = np.vstack( (classfication, tmp) )

    #remove the first row of dummy values
    desc_list = np.delete(desc_list, 0, 0)
    classfication = np.delete(classfication, 0, 0)

    #normalize training features
    normalize(desc_list)
    return desc_list, classfication
    def test_frequency(self):
        hv = Hierarchy(db_name='vec_store.sqlite', file_name='hierarchy')
        # Produce frequency plots between the lower and upp bound.
        for i in range(20, 22):

            select_limit = [i - 1, i + 1]
            data1 = np.empty(1, )
            data2 = np.empty(1, )
            hit1, hit2 = 0, 0
            for k in range(1, 4):
                selected_features1 = feature_frequency(
                    hv,
                    243,
                    3,
                    8,
                    new_data=True,
                    ridge=True,
                    scale=True,
                    globalscale=True,
                    normalization=True,
                    featselect_featvar=False,
                    featselect_featconst=True,
                    select_limit=select_limit,
                    feat_sub=i)
                selected_features2 = feature_frequency(
                    hv,
                    243,
                    3,
                    8,
                    smallest=True,
                    new_data=False,
                    ridge=True,
                    scale=True,
                    globalscale=True,
                    normalization=True,
                    featselect_featvar=False,
                    featselect_featconst=True,
                    select_limit=select_limit,
                    feat_sub=i)
                if bool(selected_features1):
                    hit1 += 1
                if bool(selected_features2):
                    hit2 += 1
                if bool(selected_features1) and bool(selected_features2):
                    data1 = np.concatenate(
                        (data1,
                         (list(selected_features1.items())[0])[1][0][:]),
                        axis=0)
                    data2 = np.concatenate(
                        (data2,
                         (list(selected_features2.items())[0])[1][0][:]),
                        axis=0)
            data1 = np.delete(data1, 0)
            data2 = np.delete(data2, 0)

            data_all = np.concatenate((data1, data2), axis=0)
            if len(data_all) > 0:
                bins = np.arange(min(data_all) - 2, max(data_all) + 2, 0.5)
                hist1 = np.histogram(data1, bins=bins)
                hist2 = np.histogram(data2, bins=bins)
                r1_hist1 = np.delete(hist1[0], np.where(hist1[0] == 0))
                r1_hist1 = np.divide(r1_hist1.astype('float'),
                                     len(data1)) * 100
                r2_hist1 = np.delete(
                    np.delete(hist1[1], np.where(hist1[0] == 0)), -1)

                r1_hist2 = np.delete(hist2[0], np.where(hist2[0] == 0))
                r1_hist2 = np.divide(r1_hist2.astype('float'),
                                     len(data2)) * 100
                r2_hist2 = np.delete(
                    np.delete(hist2[1], np.where(hist2[0] == 0)), -1)

                if np.shape(r1_hist2)[0] > np.shape(r1_hist1)[0]:
                    dif = np.shape(r1_hist2)[0] - np.shape(r1_hist1)[0]
                    r1_hist1 = np.concatenate((r1_hist1, np.zeros(dif)),
                                              axis=0)
                    r2_hist1 = np.concatenate((r2_hist1, np.zeros(dif)),
                                              axis=0)
                elif np.shape(r1_hist1)[0] > np.shape(r1_hist2)[0]:
                    dif = np.shape(r1_hist1)[0] - np.shape(r1_hist2)[0]
                    r1_hist2 = np.concatenate((r1_hist2, np.zeros(dif)),
                                              axis=0)
                    r2_hist2 = np.concatenate((r2_hist2, np.zeros(dif)),
                                              axis=0)
def chance_level(paths, unit, nr_run_time):
    data1 = pd.read_csv(paths[0], header=None).values
    data2 = pd.read_csv(paths[1], header=None).values

    data1 = np.array(data1)
    data2 = np.array(data2)

    results = []

    # remove the trials that contain 0

    indexes_to_remove = np.array([])

    for i in range(data2.shape[0]):
        if row_has_zero_values(data2[i]):
            indexes_to_remove = np.append(indexes_to_remove, i)

    data2 = np.delete(data2, indexes_to_remove, axis=0)
    data1 = np.delete(data1, indexes_to_remove, axis=0)

    y1 = get_labels(data1)
    y2 = get_labels(data2)

    # for i in range(8):
    #     o = i * 45
    #     print o
    #     print data1[y1 == o].shape

    x1 = get_data(data1)
    x2 = get_data(data2)

    unit_fr = x1[:, unit]
    unit_ma = x2[:, unit]

    init_correlation = pearsonr(unit_fr, unit_ma)[0]

    for n in range(nr_run_time):
        np.random.shuffle(data1)
        np.random.shuffle(data2)

        x1 = get_data(data1)
        x2 = get_data(data2)

        unit_fr = x1[:, unit]
        unit_ma = x2[:, unit]

        results.append(pearsonr(unit_fr, unit_ma)[0])

    x = np.array(results)
    x.sort()

    f = plt.figure()
    ax = f.add_subplot(111)
    plt.text(0.05, 0.97, "Mean: %f" % np.mean(x), ha='left', va='top', transform=ax.transAxes)
    plt.text(0.05, 0.94, "Std: %f" % np.std(x), ha='left', va='top', transform=ax.transAxes)

    magenta_patch = mpatches.Patch(color='cyan', label='Mean')
    green_patch = mpatches.Patch(color='yellow', label='Initial correlation')
    plt.legend(handles=[magenta_patch, green_patch])

    fit = norm.pdf(x, np.mean(x), np.std(x))
    plt.plot(x, fit, color='red')

    plt.axvline(x.mean(), color='cyan', linewidth=1.5, label='Mean')
    plt.axvline(init_correlation, color='yellow', linewidth=1.5, label='Init_correlation')
    plt.hist(x, bins='auto', normed=True)
    plt.title("Correlation FR_MA_UNIT: %d" % unit)
    figure = plt.gcf()
    figure.set_size_inches(15, 9)
    #plt.show()
    plt.savefig("correlation_FR_MA_UNIT_%s.png" % unit, dpi=100)
Example #33
0
    def allocate_aps(self, plot=False):
        x = []
        y = []
        z = []
        for i in range(self.grid.shape[0]):
            for j in range(self.grid.shape[1]):
                x.append(i)
                y.append(j)
                z.append(self.grid[i, j])
        d = {'x': x, 'y': y, 'z': z}
        data = pd.DataFrame(data=d)

        X = data.x
        Y = data.y
        D = np.array(list(zip(X, Y)))
        flag = 0
        # Number of clusters
        k = 1
        # X coordinates of random centroids
        C_x = np.random.uniform(0, np.max(X), size=k)
        # Y coordinates of random centroids
        C_y = np.random.uniform(0, np.max(Y), size=k)

        clusters = np.zeros(len(D))
        dense_cluster = 0
        len_list = []

        while flag != 1:
            flag = 1
            if len(C_x) != k:
                candidates = []
                for j in range(len(D)):
                    if clusters[j] == dense_cluster:
                        candidates.append(D[j, :])
                cand = random.choice(candidates)

                C_x_aux = [cand[0]]
                C_y_aux = [cand[1]]
                C_x = np.concatenate((C_x, C_x_aux), axis=0)
                C_y = np.concatenate((C_y, C_y_aux), axis=0)

            C = np.array(list(zip(C_x, C_y)), dtype=np.float32)
            # To store the value of centroids when it updates
            C_old = np.zeros(C.shape)
            # Cluster Lables(0, 1, 2)
            # Error func. - Distance between new centroids and old centroids
            error = dist(C, C_old)
            # Loop will run till the error becomes zero
            while sum(error) != 0:
                # Assigning each value to its closest cluster
                for i in range(len(D)):
                    distances = dist(D[i], C)
                    cluster = np.argmin(distances)
                    clusters[i] = cluster
                # Storing the old centroid values
                C_old = deepcopy(C)
                # Finding the new centroids by taking the average value
                for i in range(k):
                    points_x = [np.repeat(D[j, 0], data.z[j]) for j in range(len(D)) if clusters[j] == i]
                    l_x = []
                    for item in points_x:
                        l_x = np.concatenate((l_x, item), axis=0)

                    points_y = [np.repeat(D[j, 1], data.z[j]) for j in range(len(D)) if clusters[j] == i]
                    l_y = []
                    for item in points_y:
                        l_y = np.concatenate((l_y, item), axis=0)

                    if np.isnan(np.mean(l_x)):
                        C[i] = np.array([-1, -1])
                        break
                    else:
                        C[i] = np.array([np.mean(l_x), np.mean(l_y)])

                error = dist(C, C_old)
                print(error, k)

            l_max = 0
            for i in range(k):
                points_x = [np.repeat(D[j, 0], data.z[j]) for j in range(len(D)) if clusters[j] == i]
                l_x = []
                for item in points_x:
                    l_x = np.concatenate((l_x, item), axis=0)
                len_list.append(len(l_x))
                if len(l_x) > l_max:
                    l_max = len(l_x)
                    dense_cluster = i
                if len(l_x) > 200 and flag != -1:
                    flag = 0
                if len(l_x) == 0:
                    flag = -1
                print(i, len(l_x))
            if flag == -1:
                k = k - 1
            C_x = C[:, 0]
            C_y = C[:, 1]
            k = k + 1
        k = k - 1
        mean_density = np.sum(len_list) / k

        print('Clusters:', k)
        print('Mean density:', mean_density)
        print('Max_cluster:', np.max(len_list))
        print('Min_cluster:', np.min(len_list))
        print(len_list)

        C_aux = C
        count = 0
        for i in range(k):
            p = 0
            p = [p + 0 for j in range(len(D)) if clusters[j] == i]

            points_x = [np.repeat(D[j, 0], data.z[j]) for j in range(len(D)) if clusters[j] == i]
            l_x = []
            for item in points_x:
                l_x = np.concatenate((l_x, item), axis=0)

            points_y = [np.repeat(D[j, 1], data.z[j]) for j in range(len(D)) if clusters[j] == i]
            l_y = []
            for item in points_y:
                l_y = np.concatenate((l_y, item), axis=0)
            if len(l_x) == 0:
                C_aux = np.delete(C_aux, i - count, 0)
                count = count + 1
            else:
                print(C[i], len(l_x), len(p))
        C = C_aux
        k = len(C)
        self.aps = C * self.grid_step

        print(k)

        if plot:
            plt.figure(figsize=(10, 8))
            ax = sns.heatmap(data=clusters.reshape(self.grid.shape).transpose(), annot=True, cbar=False)
            ax.invert_yaxis()
            ax.scatter(C[:, 0], C[:, 1], marker='*', s=100, color='yellow')
            plt.title("APs clusters by density", fontsize=20)
            plt.show()

            plt.figure(figsize=(10, 8))
            ax = sns.heatmap(data=self.grid.transpose(), annot=True)
            ax.invert_yaxis()
            ax.scatter(C[:, 0], C[:, 1], marker='*', s=100, color='yellow')
            plt.title("APs clusters and scenario density", fontsize=20)
            plt.show()
Example #34
0
def MINE(Start: np.ndarray):
    Cycle = 100
    Digits = 2
    a, b = 20, -10
    MemNum = 30
    x0 = mat.repmat(Start, MemNum, 1)
    Dir = a * np.random.rand(MemNum, Digits) - b
    x0 += Dir
    Dir = Dir / Dir.min()
    if Dir.dtype != "float64":
        Dir = Dir.astype("float")
    Dir = Dir / (la.norm(Dir, axis=1).reshape(MemNum, 1))
    Hbest = np.zeros((MemNum, Digits))
    Hbestv = np.zeros((MemNum, 1))
    for inner in range(MemNum):
        Hbestv[inner,:] = Evaluate(x0[inner,:])
        Hbest[inner,:] = x0[inner,:]
    value = []
    Pace = np.std(Hbest, axis=0)
    Pace = 4 * la.norm(Pace)
    P = []
    R = []
    Po = []
    for i in range(Cycle):
        radius = np.std(Hbest, axis=0)
        radius = la.norm(radius)
        po = np.mean(Hbest, axis=0)
        Po.append(po)
        # Pace = 400 / (1 + np.exp(7*(i - (3*Cycle / 2)) / Cycle))
        Judge = 1 / (1 + np.exp(Pace /(radius+1e-15) - 1))
        Pace = 3.5 * radius *Judge
        Bias = 1.2 * Judge
        P.append(Pace)
        R.append(radius)
        # Bias = 1.2 / (1 + np.exp(2 * (i - (Cycle / 2)) / Cycle))
        Sort = np.argsort(Hbestv, axis=0)
        c1 = np.linspace(1.2, 1, Digits)
        c1.reshape(Digits, 1)
        T0, Tend = 0.45, 0.9
        A = (Tend - T0) / (Cycle ** 0.5)
        B = T0
        Ref = Hbest[Sort[:, 0],:]
        for Inner in range(MemNum):
            T = B + A * (Inner ** 0.5)
            base = Ref - mat.repmat(x0[Inner,:], MemNum, 1)
            Normal = la.norm(base, axis=0)
            det = Normal.argmin()
            base = np.delete(base, det, axis=0)
            base = base[0:Digits,:]
            if base.dtype != "float":
                base = base.astype("float")
            norm = la.norm(base,axis=1).reshape(Digits,1)
            for index in range(Digits):
                if norm[index, 0] == 0:
                    base[index,:] = np.ones(Digits)
                else:
                    base[index,:] = base[index,:] / norm[index,0]
            belief1 = 2 * np.random.rand(1, Digits) - Bias * c1
            belief2 = np.random.logistic(T,np.abs((1-T)/3), Digits)
            direction = np.dot(belief1, base)
            direction = direction / la.norm(direction)
            Dir[Inner,:] = (1-belief2) * Dir[Inner,:] + belief2 * direction
            if Dir[Inner,:].dtype != "float64":
                Dir = Dir.astype("float")
            Dir[Inner,:] = Dir[Inner,:] / la.norm(Dir[Inner,:])
            x0[Inner,:] = x0[Inner,:] + Pace * Dir[Inner,:]
            Check = Evaluate(x0[Inner,:])
            if Hbestv[Inner,:] > Check:
                Hbest[Inner,:] = x0[Inner,:]
                Hbestv[Inner,:] = Check
        value.append(Hbestv.min())
    Po = np.array(Po)
    plt.figure("MINE:Time-Position")
    for i in range(Digits):
        plt.plot(np.arange(Cycle), Po[:, i])
    plt.xlabel("Times")  
    plt.ylabel("Value") 
    plt.figure("MINE:Time-Radius/Pace")
    Pace,=plt.plot(range(Cycle), P, label="Pace")
    Radius,=plt.plot(range(Cycle), R, label="Radius")
    plt.legend(loc="upper right")
    plt.xlabel("Times")  
    plt.ylabel("Value") 
    return value,Hbestv.min(),Hbest[np.argmin(Hbestv),:]
    avPOW_NMRSE = 0
    avE_NMRSE = 0
    avT_NMRSE = 0
    distNMRSE = []

    # Random seed used to debug 
    #np.random.seed(1234567890)
    for iter in range(iters):
        # Dataset shuffle
        shuffledData = np.array([WH, C, K, N, LAT, POW, E, T])
        shuffledData = shuffledData[:, np.random.permutation(shuffledData.shape[1])]
        # Dataset split in k-folds
        foldSize = shuffledData.shape[1] / k_folds
        for i in range(k_folds):
            # Split for train data
            trainData = np.delete(shuffledData, np.arange(i*foldSize,i*foldSize+foldSize,dtype=int), 1)
            # Split for validation data
            validationData = shuffledData[:,np.arange(i*foldSize,i*foldSize+foldSize,dtype=int)]
            # Identification over training Dataset
            LAT_parameters, LAT_covariance = curve_fit(LatAggModel, trainData[:4,:], trainData[4,:], maxfev=1000)
            POW_parameters, POW_covariance = curve_fit(PowAggModel, trainData[:4,:], trainData[5,:], maxfev=1000)
            E_parameters, E_covariance = curve_fit(EneAggModel, trainData[:4,:], trainData[6,:], maxfev=1000)
            T_parameters, T_covariance = curve_fit(ThrAggModel, trainData[:4,:], trainData[7,:], maxfev=1000)
            # Compute resulting NRMSE on validation Dataset fold
            distNMRSE.append(NRMSE(validationData[6,:], validationData[:4,:], EneAggModel, E_parameters))
            avLAT_NMRSE += NRMSE(validationData[4,:], validationData[:4,:], LatAggModel, LAT_parameters)
            avPOW_NMRSE += NRMSE(validationData[5,:], validationData[:4,:], PowAggModel, POW_parameters)
            avE_NMRSE += NRMSE(validationData[6,:], validationData[:4,:], EneAggModel, E_parameters)
            avT_NMRSE += NRMSE(validationData[7,:], validationData[:4,:], ThrAggModel, T_parameters)
            # Store obtained distribution per fold iteration
            parameterDistLAT.append(np.concatenate((LAT_parameters[0]*selectedParameters[0], \
Example #36
0
for i in range(0,len(species)):
        try:
                ftp = FTP('ftp.ncbi.nlm.nih.gov')
                ftp.login()
                ftp.cwd('/genomes/refseq/bacteria/%s/latest_assembly_versions/'%species[i,0][3:])
                file = ''.join(ftp.nlst()[0])
                ftp.cwd(file)
                filename = file + '_genomic.fna.gz'
                genomes.append(filename[:-3])
                ftp.retrbinary('RETR ' + filename, open(filename, 'wb').write)
                subprocess.Popen('gzip -d %s'%filename, shell=True,stdout=subprocess.PIPE).wait()
                ftp.close()

        except Exception as e:
                print(e)
                species = np.delete(species, (i), axis=0)
                continue

subprocess.Popen('echo ...done >> logfile.txt', shell=True,stdout=subprocess.PIPE).wait()


#find length of each genome to determine needed depth of sequencing
for j in range(0,len(genomes)):
        glengths=[]
        glengths.append(int(subprocess.Popen("awk 'NR>1' %s | wc -c"%genomes[j],shell=True,stdout=subprocess.PIPE).stdout.read()))

#calculate length percentages and relative coverage
total = sum(glengths)
maxSeqCov = 20
glengths = np.array([100*x/total for x in glengths])
depths = np.divide(glengths,species[:,1].astype(float))*maxSeqCov
    def remove_samples(self,
                       new_sample_amount,
                       zscore_high=2,
                       weighted_dist_value=1.0,
                       annotate=False,
                       remove_noise=True,
                       remove_similar=True,
                       apply_changes=False,
                       display_all_graphs=False,
                       show_gif=False,
                       shelve_relative_path=None,
                       create_visuals=True):

        self.__index_array = None
        self.__total_indexes = None
        self.__tmp_reduced_scaled = None
        self.__all_dp_dist_list = None
        self.__pbar = None
        self.__all_dp_dist_dict = None

        new_sample_amount = int(new_sample_amount)

        if new_sample_amount >= self.__scaled.shape[0]:
            print("THROW ERROR HERE: Sample removal must be less then")
        elif new_sample_amount <= 0:
            print("THROW ERROR HERE: Val must be a positive number!")
        elif remove_noise == False and remove_similar == False:
            print("THROW ERROR HERE: At least one operation must be made!")
        else:
            # Store data for removal
            removed_dps_dict = dict()

            # Stored removed datapoints for visualizations
            noise_removal_dps_dict = dict()
            similarity_dps_dict = dict()

            df_index_scaled_dict = dict()
            # Index to shape
            for i, df_index in enumerate(self.__df_index_values):
                df_index_scaled_dict[df_index] = i

            if not remove_noise:
                folder_dir_name = "Data_Point_Removal_Weight={1}".format(
                    zscore_high, weighted_dist_value)

            elif not remove_similar:
                folder_dir_name = "Data_Point_Removal_Zscore={0}".format(
                    zscore_high, weighted_dist_value)

            else:
                folder_dir_name = "Data_Point_Removal_Zscore={0}_Weight={1}".format(
                    zscore_high, weighted_dist_value)

            # Display graph before augmentation; Create centroid
            centroid = np.mean(self.__scaled, axis=0)
            column_list = [i for i in range(0, self.__scaled.shape[1])]

            reduced_scaled = np.column_stack(
                (self.__scaled, self.__df_index_values.reshape(
                    (self.__scaled.shape[0], 1)).astype(self.__scaled.dtype)))

            if create_visuals:
                self.__visualize_data_points(centroid=centroid,
                                             scaled_data=self.__scaled,
                                             noise_removal_dps=[],
                                             similar_removal_dps=[],
                                             new_sample_amount=new_sample_amount,
                                             zscore_high=zscore_high,
                                             weighted_dist_value=weighted_dist_value,
                                             annotate=annotate,
                                             output_path=folder_dir_name,
                                             title="Starting point",
                                             remove_noise=remove_noise,
                                             remove_similar=remove_similar,
                                             display_all_graphs=display_all_graphs)

            if remove_noise:

                dp_distances = np.zeros(len(reduced_scaled))

                # Keep looping until new sample amount has been reached or
                # the distances are properly.
                while reduced_scaled.shape[0] > new_sample_amount:

                    for index, dp in enumerate(reduced_scaled):
                        dp_distances[index] = distance.euclidean(
                            centroid, dp[:column_list[-1] + 1])

                    farthest_dp_index = np.argmax(dp_distances)
                    zscores_dp_distances = zscore(np.concatenate((
                        dp_distances, np.array([distance.euclidean(centroid,
                                                                   self.__scaled[
                                                                       dp_index])
                                                for dp_index in
                                                list(removed_dps_dict.values())
                                                ])), axis=0))

                    if zscores_dp_distances[farthest_dp_index] >= zscore_high:

                        farthest_dp = reduced_scaled[farthest_dp_index][
                                      :column_list[-1] + 1]

                        # Add original dataframe index to the dict;
                        # remove actual row from the data

                        df_index = int(reduced_scaled[farthest_dp_index][-1])
                        removed_dps_dict[df_index] = df_index_scaled_dict[
                            df_index]

                        if shelve_relative_path:
                            shelf = shelve.open(shelve_relative_path)
                            shelf[shelve_relative_path.split("/")[-1]] = list(
                                removed_dps_dict.keys())
                            shelf.close()

                        if create_visuals:
                            noise_removal_dps_dict[df_index] = \
                            df_index_scaled_dict[df_index]

                        reduced_scaled = np.delete(reduced_scaled,
                                                   farthest_dp_index,
                                                   0)
                        # Update centroid
                        centroid = np.mean(reduced_scaled[:, column_list],
                                           axis=0)
                        if create_visuals:
                            self.__visualize_data_points(centroid=centroid,
                                                         scaled_data=reduced_scaled[
                                                                     :,
                                                                     column_list],
                                                         noise_removal_dps=list(
                                                             noise_removal_dps_dict.values()),
                                                         similar_removal_dps=[],
                                                         new_sample_amount=new_sample_amount,
                                                         zscore_high=zscore_high,
                                                         weighted_dist_value=weighted_dist_value,
                                                         annotate=annotate,
                                                         output_path=folder_dir_name,
                                                         new_dp_meta_noise_removal=(
                                                         farthest_dp,
                                                         zscores_dp_distances[
                                                             farthest_dp_index],
                                                         dp_distances[
                                                             farthest_dp_index]),
                                                         title="Data Removal: Noise reduction",
                                                         remove_noise=remove_noise,
                                                         remove_similar=remove_similar,
                                                         display_all_graphs=display_all_graphs)
                        else:
                            print(
                                "Scaled size is now {0} and Z-Score {1:.2f}.".format(
                                    reduced_scaled.shape[0],
                                    zscores_dp_distances[farthest_dp_index]))
                    # Break loop distances are below z-score val
                    else:
                        break
                if create_visuals:
                    self.__create_gif_dp_amount(n_start=self.__scaled.shape[0],
                                                n_end=reduced_scaled.shape[0],
                                                folder_dir_name=folder_dir_name,
                                                filename="Noise Reduction",
                                                show_gif=show_gif)

            if remove_similar:

                starting_shape = reduced_scaled.shape[0]
                
                farthest_dp_distance = None
                dp_distances = np.zeros(len(reduced_scaled))

                while reduced_scaled.shape[0] > new_sample_amount:
                    # Following unconventional programming for multi threading
                    # speed and memory increase
                    self.__index_array = [i for i in
                                          range(0, len(reduced_scaled))]
                    self.__total_indexes = len(self.__index_array)
                    self.__tmp_reduced_scaled = copy.deepcopy(
                        reduced_scaled[:, column_list])
                    
                    if not farthest_dp_distance:
                        for index, dp in enumerate(self.__tmp_reduced_scaled):
                            dp_distances[index] = distance.euclidean(
                                centroid, dp[:column_list[-1] + 1])

                        farthest_dp_distance = np.amax(dp_distances)
                        farthest_dp_distance *= weighted_dist_value

                    removal_index, keep_index, smallest_dist = self.__shortest_dist_relationship(
                        centroid)
                    
                    if farthest_dp_distance < smallest_dist:
                        print("Target distance reached!!!")
                        break

                    new_dp_meta_similar_removal = (
                    self.__tmp_reduced_scaled[removal_index],
                    self.__tmp_reduced_scaled[keep_index])

                    df_index = int(reduced_scaled[removal_index][-1])
                    removed_dps_dict[df_index] = df_index_scaled_dict[df_index]

                    if create_visuals:
                        similarity_dps_dict[df_index] = df_index_scaled_dict[
                            df_index]

                    if shelve_relative_path:
                        shelf = shelve.open(shelve_relative_path)
                        shelf[shelve_relative_path.split("/")[-1]] = list(
                            removed_dps_dict.keys())
                        shelf.close()

                    # Remove from temp scaled
                    reduced_scaled = np.delete(reduced_scaled,
                                               removal_index,
                                               0)
                    # Update centroid
                    centroid = np.mean(reduced_scaled[:, column_list],
                                       axis=0)

                    if create_visuals:
                        self.__visualize_data_points(centroid=centroid,
                                                     scaled_data=reduced_scaled[
                                                                 :,
                                                                 column_list],
                                                     noise_removal_dps=list(
                                                         noise_removal_dps_dict.values()),
                                                     similar_removal_dps=list(
                                                         similarity_dps_dict.values()),
                                                     new_sample_amount=new_sample_amount,
                                                     zscore_high=zscore_high,
                                                     weighted_dist_value=weighted_dist_value,
                                                     annotate=annotate,
                                                     output_path=folder_dir_name,
                                                     new_dp_meta_similar_removal=new_dp_meta_similar_removal,
                                                     title="Data Removal: Similarity removal",
                                                     remove_noise=remove_noise,
                                                     remove_similar=remove_similar,
                                                     display_all_graphs=display_all_graphs)
                    else:
                        print("Scaled size is now {0}.".format(
                            reduced_scaled.shape[0]))

            # De-init multithreading artifacts
            self.__index_array = None
            self.__total_indexes = None
            self.__tmp_reduced_scaled = None
            self.__all_dp_dist_list = None

            if create_visuals:
                self.__create_gif_dp_amount(n_start=starting_shape - 1,
                                            n_end=reduced_scaled.shape[0],
                                            folder_dir_name=folder_dir_name,
                                            filename="Similar Reduction",
                                            show_gif=show_gif)

            if remove_similar and remove_noise and create_visuals:
                self.__visualize_data_points(centroid=centroid,
                                             scaled_data=reduced_scaled[:,
                                                         column_list],
                                             noise_removal_dps=list(
                                                 noise_removal_dps_dict.values()),
                                             similar_removal_dps=list(
                                                 similarity_dps_dict.values()),
                                             new_sample_amount=new_sample_amount,
                                             zscore_high=zscore_high,
                                             weighted_dist_value=weighted_dist_value,
                                             annotate=annotate,
                                             output_path=folder_dir_name,
                                             new_dp_meta_similar_removal=None,
                                             title="Final Result",
                                             remove_noise=remove_noise,
                                             remove_similar=remove_similar,
                                             white_out_mode=True,
                                             no_print_output=True,
                                             display_all_graphs=display_all_graphs)

                self.__create_gif_dp_amount(n_start=self.__scaled.shape[0],
                                            n_end=reduced_scaled.shape[0],
                                            folder_dir_name=folder_dir_name,
                                            filename="Noise and Similar Reduction",
                                            flash_final_results=True,
                                            show_gif=show_gif)

            if apply_changes:
                self.__scaled = reduced_scaled[:, column_list]

            return list(removed_dps_dict.keys())
Example #38
0
    def allocate_edcs(self, opt='n', n=3, EDC_min=2, EDC_max=2):
        flag2 = 0
        if opt == 'n':
            E_n = n
            kmeans = KMeans(n_clusters=E_n, random_state=0).fit(self.aps)
            E = kmeans.cluster_centers_
            C_EDC = kmeans.predict(self.aps)
            print(E)
            print(len(E))

        elif opt == 'max':
            E_n = len(self.aps)
            while flag2 != 1:
                kmeans = KMeans(n_clusters=E_n, random_state=0).fit(self.aps)
                E = kmeans.cluster_centers_
                C_EDC = kmeans.predict(self.aps)
                for i in range(E_n):
                    if (C_EDC.tolist()).count(i) > EDC_max:
                        flag2 = 1
                E_n = E_n - 1
            E_n = E_n + 2
            kmeans = KMeans(n_clusters=E_n, random_state=0).fit(self.aps)
            E = kmeans.cluster_centers_
            C_EDC = kmeans.predict(self.aps)
            print(E)
            print(len(E))

        elif opt == 'min':
            E_n = 1
            while flag2 != 1:
                kmeans = KMeans(n_clusters=E_n, random_state=0).fit(self.aps)
                E = kmeans.cluster_centers_
                C_EDC = kmeans.predict(self.aps)
                for i in range(E_n):
                    if (C_EDC.tolist()).count(i) < EDC_min:
                        flag2 = 1
                E_n = E_n + 1
            E_n = E_n - 2
            kmeans = KMeans(n_clusters=E_n, random_state=0).fit(self.aps)
            E = kmeans.cluster_centers_
            C_EDC = kmeans.predict(self.aps)
            print(E)
            print(len(E))

        elif opt == 'minmax':
            flag3 = 0
            C_E_aux = self.aps
            E_n = 1
            r_c = []
            while flag3 != 1:
                # print(r_c)
                while flag2 != 1:
                    kmeans = KMeans(n_clusters=E_n, random_state=0).fit(C_E_aux)
                    E = kmeans.cluster_centers_
                    C_EDC = kmeans.predict(C_E_aux)
                    # print(C_EDC)
                    for i in range(E_n):
                        if (C_EDC.tolist()).count(i) < EDC_max:
                            flag2 = 1
                    E_n = E_n + 1
                flag2 = 0
                E_n = E_n - 1
                kmeans = KMeans(n_clusters=E_n, random_state=0).fit(C_E_aux)
                E = kmeans.cluster_centers_
                C_EDC = kmeans.predict(C_E_aux)
                # print('now')
                # print(C_EDC)
                f = []
                # print(C_E_aux)
                # print(E)
                count = 0
                # print(C_E_aux)
                for index, item in enumerate(C_EDC.tolist()):
                    # print(index,item)
                    if len(C_EDC.tolist()) / EDC_max < 1:
                        r_c = np.vstack((r_c, E))
                        flag3 = 1
                    if (C_EDC.tolist()).count(item) <= EDC_max:
                        C_E_aux = np.delete(C_E_aux, index - count, 0)
                        count = count + 1
                        if item not in f:
                            if len(r_c) == 0:
                                r_c = np.concatenate((r_c, E[item]), axis=0)
                            else:
                                r_c = np.vstack((r_c, E[item]))
                            f.append(item)
                if len(C_E_aux) <= EDC_max:
                    flag3 = 1
            # print(r_c)
            kmeans = KMeans(n_clusters=len(r_c), init=r_c)
            E = r_c
            C_EDC = kmeans.fit_predict(self.aps)
            print(E)
            # print(kmeans.cluster_centers_)
            E_n = len(E)
            print(len(E))
        self.edcs = E
Example #39
0
    def remove_throats(self, ti_list_delete):
        """
        Deletes throats from pore network

        Parameters
        ----------
        ti_list_delete: intarray
            Indices of throats to be deleted

        Notes
        ----------
        Throat indices will be adjust to remain continguous after deletion


        """
        assert len(np.unique(ti_list_delete)) == len(ti_list_delete)
        ti_list_old = np.arange(self.nr_t)
        ti_new_to_old = np.delete(ti_list_old, ti_list_delete)
        ti_old_to_new = {
            ti_new_to_old[i]: i
            for i in xrange(len(ti_new_to_old))
        }
        assert np.max(ti_old_to_new.values()) < self.nr_t - len(ti_list_delete)

        # Remove entries in ngh_pores and ngh_tubes arrays corresponding to deleted tubes
        for ti in ti_list_delete:
            pi_1, pi_2 = self.edgelist[ti, :]

            assert ti in self.ngh_tubes[pi_1]
            assert ti in self.ngh_tubes[pi_2]

            mask_pi_1 = self.ngh_tubes[pi_1] != ti
            mask_pi_2 = self.ngh_tubes[pi_2] != ti

            self.ngh_pores[pi_1] = self.ngh_pores[pi_1][mask_pi_1]
            self.ngh_pores[pi_2] = self.ngh_pores[pi_2][mask_pi_2]

            self.ngh_tubes[pi_1] = self.ngh_tubes[pi_1][mask_pi_1]
            self.ngh_tubes[pi_2] = self.ngh_tubes[pi_2][mask_pi_2]

            self.nr_nghs[pi_1] -= 1
            self.nr_nghs[pi_2] -= 1
            self.nr_t -= 1

        assert self.nr_t == len(ti_new_to_old)

        # Change indices of tubes in ngh_tubes arrays
        for pi in xrange(self.nr_p):
            new_ngh_tubes_pi = [
                ti_old_to_new[self.ngh_tubes[pi][x]]
                for x in xrange(self.nr_nghs[pi])
            ]
            self.ngh_tubes[pi] = np.asarray(new_ngh_tubes_pi, dtype=np.int32)
            if len(new_ngh_tubes_pi) > 0:
                assert np.max(new_ngh_tubes_pi) < self.nr_t

        self.edgelist = np.delete(self.edgelist, ti_list_delete, 0)

        self.tubes.remove_tubes(ti_list_delete)

        assert self.edgelist.shape[0] == self.nr_t
        assert self.edgelist.shape[1] == 2

        self._create_helper_properties()
    def __shortest_dist_relationship(self,
                                     centroid):

        """
            Finds the two datapoints that have the smallest distance.
        """
        if not self.__all_dp_dist_list:

            total = 0
            for i in range(0,
                           self.__tmp_reduced_scaled.shape[0]):
                total += i

            print("The total time required is:", str(
                datetime.timedelta(seconds=total * 1.3e-5)))

            self.__all_dp_dist_list = find_all_distances_in_matrix(
                matrix=self.__tmp_reduced_scaled,
                index_array=self.__index_array,
                total_indexes=self.__total_indexes,)

        # :::ADD WEIGHTED DISTANCE IDEA HERE FUTURE ERIC:::

        all_shortest = [
            [target_dp_index,
             np.argmin(distances) + target_dp_index + 1,
             np.amin(distances)]
            for target_dp_index, distances in
            enumerate(self.__all_dp_dist_list)
            if len(distances) > 0]

        smallest_dps_relationship = min(all_shortest, key=lambda x: x[2])

        dp_1_index = smallest_dps_relationship[0]
        dp_2_index = smallest_dps_relationship[1]
        smallest_distance = smallest_dps_relationship[2]

        dp_1_dist = distance.euclidean(self.__tmp_reduced_scaled[dp_1_index],
                                       centroid)

        dp_2_dist = distance.euclidean(self.__tmp_reduced_scaled[dp_2_index],
                                       centroid)

        # Decide of the two dps which to remove
        removal_index = None
        keep_index = None
        if dp_1_dist < dp_2_dist:
            removal_index = dp_2_index
            keep_index = dp_1_index
        else:
            removal_index = dp_1_index
            keep_index = dp_2_index

        # Return distances values to everyone above the removed index
        for sub_removal_index, dp_index_key in enumerate(
                range(removal_index - 1, -1, -1)):
            self.__all_dp_dist_list[dp_index_key] = np.delete(
                self.__all_dp_dist_list[dp_index_key],
                sub_removal_index, 0)

        self.__all_dp_dist_list.pop(removal_index)

        # Return back the indexes and distance
        return removal_index, keep_index, smallest_distance