Ejemplo n.º 1
0
def test_morph_labels():
    """Test morph_labels."""
    # Just process the first 5 labels for speed
    parc_fsaverage = read_labels_from_annot(
        'fsaverage', 'aparc', subjects_dir=subjects_dir)[:5]
    parc_sample = read_labels_from_annot(
        'sample', 'aparc', subjects_dir=subjects_dir)[:5]
    parc_fssamp = morph_labels(
        parc_fsaverage, 'sample', subjects_dir=subjects_dir)
    for lf, ls, lfs in zip(parc_fsaverage, parc_sample, parc_fssamp):
        assert lf.hemi == ls.hemi == lfs.hemi
        assert lf.name == ls.name == lfs.name
        perc_1 = np.in1d(lfs.vertices, ls.vertices).mean() * 100
        perc_2 = np.in1d(ls.vertices, lfs.vertices).mean() * 100
        # Ideally this would be 100%, but we do not use the same algorithm
        # as FreeSurfer ...
        assert perc_1 > 92
        assert perc_2 > 88
    with pytest.raises(ValueError, match='wrong and fsaverage'):
        morph_labels(parc_fsaverage, 'sample', subjects_dir=subjects_dir,
                     subject_from='wrong')
    with pytest.raises(RuntimeError, match='Number of surface vertices'):
        _load_vert_pos('sample', subjects_dir, 'white', 'lh', 1)
    for label in parc_fsaverage:
        label.subject = None
    with pytest.raises(ValueError, match='subject_from must be provided'):
        morph_labels(parc_fsaverage, 'sample', subjects_dir=subjects_dir)
Ejemplo n.º 2
0
def compare(neurons1, spikes1, neurons2, spikes2):
  import matplotlib.pyplot as plt
  [sn1,ss1]=sort(neurons1,spikes1)
  [sn2,ss2]=sort(neurons2,spikes2)
  
  #sn1 = neurons1
  #ss1 = spikes1
  #sn2 = neurons2
  #ss2 = spikes2
  
  in1 = np.in1d(sn1, sn2)
  in2 = np.in1d(sn2, sn1)
  
  nin = len(sn1[in1])
  
  print "Neuron in 1 but not in 2:", len(sn1)-nin
  print "Neuron in 2 but not in 1:", len(sn2)-nin
  
  for i in range(0,nin):
    if np.any(ss1[in1][i] > 0) and np.any(ss2[in2][i] > 0):
      if (len(ss1[in1][i]) == len(ss2[in2][i])):
	plt.plot(ss1[in1][i]-ss2[in2][i], i*np.ones([len(ss1[in1][i]),1]), '*')
      else:
	print "For neuron", sn1[in1][i], "difference in length of spiketrains: ", len(ss1[in1][i]) - len(ss2[in2][i])
	print "ss1:", ss1[in1][i]
	print "ss2:", ss2[in2][i]
	#plt.plot(ss1[in1][i][:np.min(len(ss1[in1][i]), len(ss2[in2][i]))]-ss2[in2][i][:np.min(len(ss1[in1][i]), len(ss2[in2][i]))], i*np.ones([np.min(len(ss1[in1][i]), len(ss2[in2][i])),1]), '*')
  plt.show()
Ejemplo n.º 3
0
    def apply(self, group):
        res = self.sel.apply(group)
        if not res:
            return group[[]]  # empty selection

        # Fragment must come before self.prop_trans lookups!
        if self.prop == 'fragment':
            # Combine all fragments together, then check where group
            # indices are same as fragment(s) indices
            allfrags = functools.reduce(lambda x, y: x + y, res.fragments)

            mask = np.in1d(group.indices, allfrags.indices)
            return group[mask].unique
        # [xyz] must come before self.prop_trans lookups too!
        try:
            pos_idx = {'x': 0, 'y': 1, 'z': 2}[self.prop]
        except KeyError:
            # The self.prop string was already checked,
            # so don't need error checking here.
            # KeyError at this point is impossible!
            attrname = self.prop_trans[self.prop]
            vals = getattr(res, attrname)
            mask = np.in1d(getattr(group, attrname), vals)

            return group[mask].unique
        else:
            vals = res.positions[:, pos_idx]
            pos = group.positions[:, pos_idx]

            # isclose only does one value at a time
            mask = np.vstack([np.isclose(pos, v)
                              for v in vals]).any(axis=0)
            return group[mask].unique
Ejemplo n.º 4
0
    def get_destination_pathline_data(self, dest_cells):
        """Get pathline data for set of destination cells.

        Parameters
        ----------
        dest_cells : list or array of tuples
            (k, i, j) of each destination cell (zero-based)

        Returns
        -------
        pthldest : np.recarray
            Slice of pathline data array (e.g. PathlineFile._data)
            containing only pathlines with final k,i,j in dest_cells.
        """
        ra = self._data.view(np.recarray)
        # find the intersection of endpoints and dest_cells
        # convert dest_cells to same dtype for comparison
        raslice = ra[['k', 'i', 'j']]
        dest_cells = np.array(dest_cells, dtype=raslice.dtype)
        inds = np.in1d(raslice, dest_cells)
        epdest = ra[inds].copy().view(np.recarray)

        # use particle ids to get the rest of the paths
        inds = np.in1d(ra.particleid, epdest.particleid)
        pthldes = ra[inds].copy()
        pthldes.sort(order=['particleid', 'time'])
        return pthldes
def generateBatch(curinds, elements, atomArraysAll, nAtomsDict,
                  atomsIndsReverse, atomArraysAllDerivs):
    """This method generates batches from a large dataset using a set of
    selected indices curinds."""
    # inputs:
    atomArraysFinal = {}
    atomArraysDerivsFinal = {}
    for element in elements:
        validKeys = np.in1d(atomsIndsReverse[element], curinds)
        if len(validKeys) > 0:
            atomArraysFinal[element] = atomArraysAll[element][validKeys]
            if len(atomArraysAllDerivs[element]) > 0:
                atomArraysDerivsFinal[element] = atomArraysAllDerivs[
                    element][validKeys, :, :, :]
            else:
                atomArraysDerivsFinal[element] = []
        else:
            atomArraysFinal[element] = []
            atomArraysDerivsFinal[element] = []

    atomInds = {}
    for element in elements:
        validKeys = np.in1d(atomsIndsReverse[element], curinds)
        if len(validKeys) > 0:
            atomIndsTemp = np.sum(atomsIndsReverse[element][validKeys], 1)
            atomInds[element] = atomIndsTemp * 0.
            for i in range(len(curinds)):
                atomInds[element][atomIndsTemp == curinds[i]] = i
        else:
            atomInds[element] = []

    return atomArraysFinal, atomArraysDerivsFinal, atomInds
Ejemplo n.º 6
0
  def _sensoryComputeLearningMode(self, anchorInput):
    """
    Associate this location with a sensory input. Subsequently, anchorInput will
    activate the current location during anchor().

    @param anchorInput (numpy array)
    A sensory input. This will often come from a feature-location pair layer.
    """
    overlaps = self.connections.computeActivity(anchorInput,
                                                self.connectedPermanence)
    activeSegments = np.where(overlaps >= self.activationThreshold)[0]

    potentialOverlaps = self.connections.computeActivity(anchorInput)
    matchingSegments = np.where(potentialOverlaps >=
                                self.learningThreshold)[0]

    # Cells with a active segment: reinforce the segment
    cellsForActiveSegments = self.connections.mapSegmentsToCells(
      activeSegments)
    learningActiveSegments = activeSegments[
      np.in1d(cellsForActiveSegments, self.activeCells)]
    remainingCells = np.setdiff1d(self.activeCells, cellsForActiveSegments)

    # Remaining cells with a matching segment: reinforce the best
    # matching segment.
    candidateSegments = self.connections.filterSegmentsByCell(
      matchingSegments, remainingCells)
    cellsForCandidateSegments = (
      self.connections.mapSegmentsToCells(candidateSegments))
    candidateSegments = candidateSegments[
      np.in1d(cellsForCandidateSegments, remainingCells)]
    onePerCellFilter = np2.argmaxMulti(potentialOverlaps[candidateSegments],
                                       cellsForCandidateSegments)
    learningMatchingSegments = candidateSegments[onePerCellFilter]

    newSegmentCells = np.setdiff1d(remainingCells, cellsForCandidateSegments)

    for learningSegments in (learningActiveSegments,
                             learningMatchingSegments):
      self._learn(self.connections, self.rng, learningSegments,
                  anchorInput, potentialOverlaps,
                  self.initialPermanence, self.sampleSize,
                  self.permanenceIncrement, self.permanenceDecrement,
                  self.maxSynapsesPerSegment)

    # Remaining cells without a matching segment: grow one.
    numNewSynapses = len(anchorInput)

    if self.sampleSize != -1:
      numNewSynapses = min(numNewSynapses, self.sampleSize)

    if self.maxSynapsesPerSegment != -1:
      numNewSynapses = min(numNewSynapses, self.maxSynapsesPerSegment)

    newSegments = self.connections.createSegments(newSegmentCells)

    self.connections.growSynapsesToSample(
      newSegments, anchorInput, numNewSynapses,
      self.initialPermanence, self.rng)
    self.activeSegments = activeSegments
Ejemplo n.º 7
0
def test_match_mask():
    msk = np.array([ True, False,  True, False, False], dtype=bool)
    idx = np.array([0, 2])
    arr = np.array([1,2,3,4,5]) 
    values = np.array([1,3])
    assert (num.match_mask(arr, values) == msk).all()
    ret = num.match_mask(arr, values, fullout=True)
    assert (ret[0] == msk).all()
    assert (ret[1] == idx).all()
    assert (arr[msk] == np.array([1, 3])).all()
    assert (ret[0] == np.in1d(arr, values)).all()
    
    # handle cases where len(values) > len(arr) and values not contained in arr
    values = np.array([1,3,3,3,7,9,-3,-4,-5])
    ret = num.match_mask(arr, values, fullout=True)
    assert (ret[0] == msk).all()
    assert (ret[1] == idx).all()
    assert (ret[0] == np.in1d(arr, values)).all()

    # float values: use eps
    ret = num.match_mask(arr+0.1, values, fullout=True, eps=0.2)
    assert (ret[0] == msk).all()
    assert (ret[1] == idx).all()
    
    msk = num.match_mask(np.array([1,2]), np.array([3,4])) 
    assert (msk == np.array([False]*2)).all()
Ejemplo n.º 8
0
	def __init__(self, sensorLst):
		self.firstByte   = 19
		self.packetInfo  = [csp3.PacketDct[i] for i in sensorLst]
		self.sizeLst     = np.array([i["size"] for i in self.packetInfo])
		self.numBytes    = np.sum(self.sizeLst)
		self.packetTypes = [i["dtype"] for i in self.packetInfo]
		self.dataFormat  = ">" + "".join(self.packetTypes)

		# Total size is based on the number of sensors, the size 
		# of the data from the sensors, plus 3 bytes for checking integrity
		self.totalSize  = len(sensorLst) + self.numBytes + 3

		# We want the indices where the sensor IDs (not their data) is located
		self.idIx        = np.cumsum(np.append(np.array(2), self.sizeLst + 1))[:-1]
		self.idMask      = np.in1d(np.arange(self.totalSize), self.idIx)
		# The indices where non-data (i.e., header, packet id, checksum) is located
		self.nonDataIx   = np.concatenate((np.array([0,1]), self.idIx, np.array([self.totalSize-1,])))
		# An index array for where non-data bytes appear
		self.nonDataMask = np.in1d(np.arange(self.totalSize - 1), self.nonDataIx)
		# The indices where data appears
		self.dataIx      = np.arange(self.totalSize - 1)[~self.nonDataMask]
		# Make an array of the checkbits, of size equal to total length of packet
		tmp 	 		 = np.zeros(self.totalSize)
		tmp[self.idMask] = np.array(sensorLst)
		self.packetCheck = tmp

		# Initialize the actual packet construction machinery
		self.lastPacket = []
		self.curPacket  = []
		self.count      = 0
		self.checksum   = 0
		self.state      = csp3.WAIT_HEADER
Ejemplo n.º 9
0
    def _do_one_inner_iteration(self, inv_val):
        r"""
        Determine which throats are invaded at a given applied capillary
        pressure.

        """
        # Generate a tlist containing boolean values for throat state
        Tinvaded = self['throat.entry_pressure'] <= inv_val
        # Find all pores that can be invaded at specified pressure
        [pclusters, tclusters] = self._net.find_clusters2(mask=Tinvaded,
                                                          t_labels=True)
        if self._AL:
            # Identify clusters connected to invasion sites
            inv_clusters = sp.unique(pclusters[self['pore.inlets']])
        else:
            # All clusters are invasion sites
            inv_clusters = pclusters
        inv_clusters = inv_clusters[inv_clusters >= 0]
        # Find pores on the invading clusters
        pmask = np.in1d(pclusters, inv_clusters)
        # Store current applied pressure in newly invaded pores
        pinds = (self['pore.inv_Pc'] == sp.inf) * (pmask)
        self['pore.inv_Pc'][pinds] = inv_val
        # Find throats on the invading clusters
        tmask = np.in1d(tclusters, inv_clusters)
        # Store current applied pressure in newly invaded throats
        tinds = (self['throat.inv_Pc'] == sp.inf) * (tmask)
        self['throat.inv_Pc'][tinds] = inv_val
        # Store total network saturation
        tsat = sp.sum(self._net['throat.volume'][self['throat.inv_Pc'] <= inv_val])
        psat = sp.sum(self._net['pore.volume'][self['pore.inv_Pc'] <= inv_val])
        total = sp.sum(self._net['throat.volume']) + sp.sum(self._net['pore.volume'])
        self['pore.inv_sat'][pinds] = (tsat + psat)/total
        self['throat.inv_sat'][tinds] = (tsat + psat)/total
Ejemplo n.º 10
0
def average_AM_firing_rate(spikeTimestamps, eventOnsetTimes, behavData, timeRange):

    currentFreq = behavData['currentFreq']
    possibleFreq = np.unique(currentFreq)


    fr_array=np.array([])
    #Only need to calculate this once, the loop then selects for each freq
    spikeTimesFromEventOnset, trialIndexForEachSpike, indexLimitsEachTrial = spikesanalysis.eventlocked_spiketimes(
        spikeTimestamps, eventOnsetTimes, timeRange)

    for freq in possibleFreq:
        select = np.flatnonzero(currentFreq==freq)
        selectspikes = spikeTimesFromEventOnset[np.in1d(trialIndexForEachSpike, select)]
        selectinds = trialIndexForEachSpike[np.in1d(trialIndexForEachSpike, select)]
        selectlimits = indexLimitsEachTrial[:, select]


        numSpikesEachTrial = np.squeeze(np.diff(selectlimits, axis=0))
        spikeRateEachTrial = numSpikesEachTrial / float(timeRange[1]-timeRange[0])
        averageFR = spikeRateEachTrial.mean()

        fr_array=np.concatenate((fr_array, np.array([averageFR])))

    return fr_array
Ejemplo n.º 11
0
    def Check_Result(self, Str_DataName, Int_DataNum, List_PeakIdx):
        Array_MyAnswer = np.array(List_PeakIdx)
        Array_MyAnswer = np.unique(Array_MyAnswer)
        Array_Anno = self.Load_Answer(Str_DataName, Int_DataNum)


        Int_TP = 0
        Int_FP = 0
        Int_FN = 0

        Int_BufferSize = 2
        for myanswer in Array_MyAnswer:
            Array_BufferMyAnswer = range(myanswer-Int_BufferSize, myanswer + Int_BufferSize)
            Array_BufferMyAnswer = np.array(Array_BufferMyAnswer)
            Array_InorNOT = np.in1d(Array_BufferMyAnswer, Array_Anno)
            if True in Array_InorNOT:
                Int_TP += 1
            elif True not in Array_InorNOT:
                Int_FP += 1

        for trueanswer in Array_Anno:
            Array_BufferMyAnswer = range(trueanswer - Int_BufferSize, trueanswer + Int_BufferSize)
            Array_BufferMyAnswer = np.array(Array_BufferMyAnswer)
            Array_InorNOT = np.in1d(Array_BufferMyAnswer, Array_MyAnswer)
            if True not in Array_InorNOT:
                Int_FN += 1

        Flt_Se = float(Int_TP) / float(Int_TP + Int_FN)
        Flt_PP = float(Int_TP) / float(Int_TP + Int_FP)
        return Str_DataName, Int_DataNum, Flt_Se, Flt_PP
Ejemplo n.º 12
0
 def generate_throats(self):
     r"""
     Generate the throats (connections, numbering and types)
     """
     self._logger.info("generate_throats: Define connections between pores")
     
     img = self._net_img
     [Nx, Ny, Nz] = np.shape(img)
     Np = Nx*Ny*Nz
     ind = np.arange(0,Np)
     
     #Generate throats based on pattern of the adjacency matrix
     tpore1_1 = ind[(ind%Nx)<(Nx-1)]
     tpore2_1 = tpore1_1 + 1
     tpore1_2 = ind[(ind%(Nx*Ny))<(Nx*(Ny-1))]
     tpore2_2 = tpore1_2 + Nx
     tpore1_3 = ind[(ind%Np)<(Nx*Ny*(Nz-1))]
     tpore2_3 = tpore1_3 + Nx*Ny
     tpore1 = np.hstack((tpore1_1,tpore1_2,tpore1_3))
     tpore2 = np.hstack((tpore2_1,tpore2_2,tpore2_3))
     connections = np.vstack((tpore1,tpore2)).T
     connections = connections[np.lexsort((connections[:, 1], connections[:, 0]))]
     
     #Remove throats to non-active pores
     img_ind = np.ravel_multi_index(np.nonzero(img), dims=np.shape(img), order='F')
     temp0 = np.in1d(connections[:,0],img_ind)
     temp1 = np.in1d(connections[:,1],img_ind)
     tind = temp0*temp1
     connections = connections[tind]
     
     self._net.throat_properties['connections'] = self._voxel_to_pore_map[connections]
     self._net.throat_properties['type'] = np.zeros(np.sum(tind))
     self._net.throat_properties['numbering'] = np.arange(0,np.sum(tind))
     self._logger.debug("generate_throats: End of method")
Ejemplo n.º 13
0
def AM_vector_strength(spikeTimestamps, eventOnsetTimes, behavData, timeRange):

    currentFreq = behavData['currentFreq']
    possibleFreq = np.unique(currentFreq)

    vs_array=np.array([])
    ral_array=np.array([])
    pval_array = np.array([])
    timeRange = [0, 0.5]
    spikeTimesFromEventOnset, trialIndexForEachSpike, indexLimitsEachTrial = spikesanalysis.eventlocked_spiketimes(
        spikeTimestamps, eventOnsetTimes, timeRange)

    for freq in possibleFreq:

        select = np.flatnonzero(currentFreq==freq)
        selectspikes = spikeTimesFromEventOnset[np.in1d(trialIndexForEachSpike, select)]
        selectinds = trialIndexForEachSpike[np.in1d(trialIndexForEachSpike, select)]
        squeezedinds=np.array([list(np.unique(selectinds)).index(x) for x in selectinds])

        spikesAfterFirstCycle = selectspikes[selectspikes>(1.0/freq)]
        indsAfterFirstCycle = selectinds[selectspikes>(1.0/freq)]

        strength, phase = vectorstrength(spikesAfterFirstCycle, 1.0/freq)
        vs_array=np.concatenate((vs_array, np.array([strength])))

        #Compute the pval for the vector strength
        radsPerSec=freq*2*np.pi
        spikeRads = (spikesAfterFirstCycle*radsPerSec)%(2*np.pi)
        ral_test = circstats.rayleigh_test(spikeRads)
        pval = np.array([ral_test['pvalue']])
        ral =np.array([2*len(spikesAfterFirstCycle)*(strength**2)]) 
        pval_array = np.concatenate((pval_array, pval))
        ral_array = np.concatenate((ral_array, ral))

    return vs_array, pval_array, ral_array
Ejemplo n.º 14
0
def test_group_shuffle_split():
    for groups_i in test_groups:
        X = y = np.ones(len(groups_i))
        n_splits = 6
        test_size = 1./3
        slo = GroupShuffleSplit(n_splits, test_size=test_size, random_state=0)

        # Make sure the repr works
        repr(slo)

        # Test that the length is correct
        assert_equal(slo.get_n_splits(X, y, groups=groups_i), n_splits)

        l_unique = np.unique(groups_i)
        l = np.asarray(groups_i)

        for train, test in slo.split(X, y, groups=groups_i):
            # First test: no train group is in the test set and vice versa
            l_train_unique = np.unique(l[train])
            l_test_unique = np.unique(l[test])
            assert_false(np.any(np.in1d(l[train], l_test_unique)))
            assert_false(np.any(np.in1d(l[test], l_train_unique)))

            # Second test: train and test add up to all the data
            assert_equal(l[train].size + l[test].size, l.size)

            # Third test: train and test are disjoint
            assert_array_equal(np.intersect1d(train, test), [])

            # Fourth test:
            # unique train and test groups are correct, +- 1 for rounding error
            assert_true(abs(len(l_test_unique) -
                            round(test_size * len(l_unique))) <= 1)
            assert_true(abs(len(l_train_unique) -
                            round((1.0 - test_size) * len(l_unique))) <= 1)
Ejemplo n.º 15
0
    def Pred_EOF_CCA(self):
        '''
        预报模块,需要进一步完善,有很多内容需要进一步深入
        '''

        I_Year = self.I_Year
        I_YearP = self.I_YearP
        print('I_Year=',I_Year)
        print('I_YearP=',I_YearP)
        #print(self.Field[:,0,0])
        #print(self.FieldP[:,0,0])

        #sys.exit(0)

        Region = self.Region[:,np.in1d(I_Year,I_YearP)]
        print('I_YearR=',I_Year[np.in1d(I_Year,I_YearP)])

        FieldP = self.FieldP[:,self.p_np3]  #等于过滤后的场文件
        FieldP = FieldP.T

        FieldP2 = FieldP[:,np.in1d(I_YearP,I_Year)]

        print(FieldP2.shape,np.atleast_2d(FieldP[:,-1]).T.shape)

        print('FieldP.shape = ',FieldP.shape)
        print('FieldP2.shape = ',FieldP2.shape)
        print('Region.shape = ',Region.shape)
        self.X_Pre = dclim.dpre_eof_cca(FieldP2,Region,np.atleast_2d(FieldP[:,-1]).T,4)
        print(self.X_Pre.shape)

        self.out = np.hstack((self.StaLatLon,self.X_Pre))
        
        print('Pred Year is ',I_YearP[-1])
        np.savetxt('out.txt',self.out,fmt='%5d %7.2f %7.2f %7.2f',delimiter=' ')
Ejemplo n.º 16
0
    def __init__(self, filename_list):
        n_file = np.size(filename_list)
        for i_file in np.arange(n_file):
            print('Adding %s to build TargetSurvey %d files to go'%(filename_list[i_file], n_file - i_file))
            tmp = TargetTile(filename_list[i_file])
            # The first file is a simple initialization
            if(i_file==0):
                self.type = tmp.type.copy()
                self.id = tmp.id.copy()
                self.n_observed = tmp.n_observed.copy()
                self.assigned_type = tmp.assigned_type.copy()
                self.assigned_z = tmp.assigned_z.copy()
                self.tile_names= []
                for i in np.arange(np.size(self.id)):
                    self.tile_names.append([filename_list[i_file]])
            else: # the other files have to take into account the overlap
                mask = np.in1d(self.id, tmp.id)

                if((len(self.tile_names)!=np.size(self.id))):
                    raise ValueError('Building TargetSurvey the numer of items in the filenames is not the same as in the ids.')
                for i in np.arange(np.size(self.id)):
                    if(mask[i]==True):
                        self.tile_names[i].append(filename_list[i_file])

                mask = np.in1d(tmp.id, self.id, invert=True)
                n_new = np.size(np.where(mask==True))
                self.id = np.append(self.id, tmp.id[mask])
                self.type = np.append(self.type, tmp.type[mask])
                self.n_observed = np.append(self.n_observed, tmp.n_observed[mask])
                self.assigned_type = np.append(self.assigned_type, tmp.assigned_type[mask])
                self.assigned_z = np.append(self.assigned_z, tmp.assigned_z[mask])
                for i in np.arange(n_new):
                    self.tile_names.append([filename_list[i_file]])

        self.n_targets = np.size(self.id)
Ejemplo n.º 17
0
    def filter_effects(self):
        """
        Merge effects and data, and flip effect alleles 
        """
        effect_positions=self.effects[["CHR", "POS"]]
        data_positions=self.data.snp[["CHR", "POS"]]

        effect_include=np.in1d(effect_positions, data_positions)
        data_include=np.in1d(data_positions, effect_positions)

        self.data.filter_snps(data_include)
        self.effects=self.effects[effect_include]
        # Just give up and convert to float. I have no idea why int doesn't work here
        # but it's something to do with the fact that you can't have None as a numpy int
        # wheras float gets converted to nan. 
        tmp_data=nprec.append_fields(self.data.snp, "GENO", None, dtypes=[(float,self.data.geno.shape[1])],usemask=False)
        tmp_data["GENO"]=self.data.geno
        self.effects=nprec.join_by(["CHR", "POS"], self.effects, tmp_data, usemask=False, jointype="inner")
        flipped=0
        removed=0
        for rec in self.effects:
            if rec["EFFECT"]==rec["REF"] and rec["OTHER"]==rec["ALT"]:
                pass
            elif rec["OTHER"]==rec["REF"] and rec["EFFECT"]==rec["ALT"]:
                flipped+=1
                rec["OTHER"]=rec["ALT"]
                rec["EFFECT"]=rec["REF"]
                rec["BETA"]=-rec["BETA"]
            else:
                removed+=1
                rec["EFFECT"]=rec["OTHER"]="N"

        self.effects=self.effects[self.effects["EFFECT"]!="N"]
        print( "Removed "+str(removed)+" non-matching alleles",file=sys.stderr)
        print( "Flipped "+str(flipped)+" alleles",file=sys.stderr)
Ejemplo n.º 18
0
def edgetype(G):
    """  edge type

    Examples
    --------

    .. plot::
        :include-source: 

        >>> from pylayers.util.geomutil import *
        >>> import shapely.geometry as shg 
        >>> import matplotlib.pyplot as plt 
        >>> points = shg.MultiPoint([(0, 0),(0, 1),(1,1),(1.5,1),(2.5,1),(2.5,2),(2.8,2),(2.8,1.1),(3.2, 1.1), (3.2, 0.7), (0.4, 0.7), (0.4, 0)])
        >>> polyg  = Polygon(points)
        >>> Gv     = polyg.buildGv(show=True) 
        >>> plt.show()

    """
    edges = np.array(G.edges())
    tedg  = np.array(G.edges())
    eprod = np.prod(tedg,axis=1)
    esum  = np.sum(tedg,axis=1)
    inded = np.nonzero(eprod<0)[0] 
    ekeep = np.nonzero(eprod>0)[0]
    ieded = np.nonzero(esum>0)[0]
    indnd = np.nonzero(esum<0)[0]
    u1 = np.in1d(ieded,ekeep)
    u2 = np.in1d(indnd,ekeep)

    nded = list(edges[inded])
    eded = list(edges[ieded[u1]])
    ndnd = list(edges[indnd[u2]])
    return(ndnd,nded,eded)
def fn_get_COV_from_JK(kappa_qe_arr, noofsims):

	nx, ny = kappa_qe_arr[0].shape
	dx =dy = 0.5
	boxsize = nx * dx
	mapparams = [nx, ny, dx, dy]

	#### cluster stuff
	clra, cldec = 0., 0.
	minval, maxval = clra-boxsize/2/60.,  clra+boxsize/2/60.
	ra = dec = np.linspace(minval, maxval, nx)
	RA, DEC = np.meshgrid(ra,dec)
	RADEC = [RA, DEC]

	totalclus = len(kappa_qe_arr)

	#make several splits now
	each_split_should_contain = int(totalclus * 1./noofsims)
	fullarr = np.arange( totalclus )
	inds_to_pick = np.copy(fullarr)
	STACKED_KAPPA_QE_JK = []
	for n in range(noofsims):

		logfile = open(log_file, 'a');
		logline = '\t\tsimno = %s\n' %(n)
		logfile.writelines('%s\n' %(logline));logfile.close()

		print logline
		inds = np.random.choice(inds_to_pick, size = each_split_should_contain, replace = 0)
		inds_to_delete = np.where (np.in1d(inds_to_pick, inds) == True)[0]
		inds_to_pick = np.delete(inds_to_pick, inds_to_delete)

		#push all on the non inds dic into - because for each JK we will ignore the files for this respective sim
		tmp = np.in1d(fullarr, inds)
		non_inds = np.where(tmp == False)[0]

		#print len(non_inds)

		STACKED_KAPPA = np.mean( kappa_qe_arr[non_inds], axis = 0 )
		#STACKED_KAPPA = STACKED_KAPPA - MEAN_FIELD
		if sims.is_seq(MEAN_FIELD): STACKED_KAPPA = STACKED_KAPPA - MEAN_FIELD

		STACKED_KAPPA_QE_JK.append(STACKED_KAPPA)

	STACKED_KAPPA_QE_JK = np.asarray(STACKED_KAPPA_QE_JK)

	RADPROFILES = np.asarray( map(lambda x: sims.fn_radial_profile(x, RADEC, bin_size=binsize, minbin=0.0, maxbin=maxbin), STACKED_KAPPA_QE_JK) )

	#calculate covariance between radial bins now
	RADPRF = RADPROFILES[:,:,1]
	totbins = np.shape(RADPRF)[1]

	RADPRF_MEAN = np.mean(RADPRF, axis = 0)
	RADPRF = RADPRF - RADPRF_MEAN

	#from IPython import embed; embed()
	kappa_COV = sims.calcCov(RADPRF, noofsims, npixels = totbins)
	#kappa_COV_2 = sims.calcCov(RADPRF, noofsims, npixels = totbins, perform_mean_sub = 0)

	return kappa_COV
def compute_mAP(index, qc, good_index, junk_index):
    ap = 0
    cmc = torch.IntTensor(len(index)).zero_()
    if good_index.size==0:   # if empty
        cmc[0] = -1
        return ap,cmc

    # remove junk_index
    ranked_camera = gallery_cam[index]
    mask = np.in1d(index, junk_index, invert=True)
    #mask2 = np.in1d(index, np.append(good_index,junk_index), invert=True)
    index = index[mask]
    ranked_camera = ranked_camera[mask]
    for i in range(10):
        cam_metric[ qc-1, ranked_camera[i]-1 ] +=1

    # find good_index index
    ngood = len(good_index)
    mask = np.in1d(index, good_index)
    rows_good = np.argwhere(mask==True)
    rows_good = rows_good.flatten()
    
    cmc[rows_good[0]:] = 1
    for i in range(ngood):
        d_recall = 1.0/ngood
        precision = (i+1)*1.0/(rows_good[i]+1)
        if rows_good[i]!=0:
            old_precision = i*1.0/rows_good[i]
        else:
            old_precision=1.0
        ap = ap + d_recall*(old_precision + precision)/2

    return ap, cmc
Ejemplo n.º 21
0
def close_obj(coord, size):
    coord = coord_pack(coord)
    ba, ab = np.indices((len(coord), len(coord)), dtype=np.int16)
    sep = coord[ab].separation(coord[ba])
    c = np.where(sep < size)
    close = np.where(c[0] < c[1])
    pairs = np.vstack((c[0][close],c[1][close]))
    samefov = np.delete(np.arange(len(coord), dtype=np.int16), np.hstack((c[0][close],c[1][close])))
    samefov = samefov.reshape(len(samefov),1).tolist()
    n, m = np.unique(pairs[0], return_counts=True)
    y = np.in1d(pairs[0], n[np.where(m == 1)])
    n1, m1 = np.unique(pairs[1], return_counts=True)
    y1 = np.in1d(pairs[1], n1[np.where(m1 == 1)])
    samefov = samefov + pairs.T[y*y1].tolist()
    q = pairs.T[-(y*y1)]
    for z in np.unique(q.T[0]):
        b = q.T[1][np.where(q.T[0] == z)]
        combs = []
        for i in np.arange(len(b),0, -1, dtype=np.int16):
            els = [[z] + list(x) for x in itertools.combinations(b, i)]
            combs.append(els)
        for i in combs:
            for a in i:
                d = False
                w = [list(x) for x in itertools.combinations(a,2)]
                if not np.all([i in q.tolist() for i in w]):
                    continue
                for v in samefov:
                    e = all(k in v for k in a)
                    d = bool(d + e)
                if d == False:
                    samefov = samefov + [a]
    return np.sort(samefov).tolist()
Ejemplo n.º 22
0
def classifyPerCountry(T,V,Y,Y_country_hat):
	Y_country = np.floor(Y / 1000)
	print "\nClassifying per Country"
	Y_city = Y 
	country_codes = list(set(Y_country))
	nCountryCodes = len(country_codes)
	Y_hat = np.zeros(len(Y_country_hat))
	for i in xrange(nCountryCodes):
		print '%s\r' % ' '*20,
		print '   ' , i*100/nCountryCodes,
#		clf = MultinomialNB(0.5)
		clf = SVC()
		country_idx = np.in1d(Y_country,country_codes[i])
		country_idx_sparse = country_idx.nonzero()[0]
		T_country = T[country_idx_sparse,:]
		Y_cityPerCountry = Y_city[country_idx]
		unique_Y_cityPerCountry=list(set(Y_cityPerCountry))
		predict_idx = np.in1d(Y_country_hat,country_codes[i])
		predict_idx_sparse = predict_idx.nonzero()[0]
		if len(unique_Y_cityPerCountry)==1 :
			Y_hat[predict_idx] = unique_Y_cityPerCountry
			continue
		clf.fit(T_country,Y_cityPerCountry)
		if sum(predict_idx) > 1:
			Y_cityPerCountry_hat = clf.predict(V[predict_idx_sparse,:])
			Y_hat[predict_idx] = Y_cityPerCountry_hat
	print "\n"
	return Y_hat
Ejemplo n.º 23
0
    def sim_top_doc(self, topic_or_topics, weights=[], filter_words=[],
                    print_len=10, as_strings=True, label_fn=_def_label_fn_, 
                    filter_nan=True):
        """
        """
        d_arr = _sim_top_doc_(self.corpus, self.model.doc_top, topic_or_topics, 
                              self.model.context_type, weights=weights, 
                              norms=self._doc_norms, print_len=print_len,
                              as_strings=False, label_fn=label_fn, 
                              filter_nan=filter_nan)
        
        topics = _res_top_type_(topic_or_topics)

        if len(filter_words) > 0:
            white = set()
            for w in filter_words:
                l = self.word_topics(w, as_strings=False)
                d = l['i'][np.in1d(l['value'], topics)]
                white.update(d)
            
            d_arr = d_arr[(np.in1d(d_arr['i'], white))]

        if as_strings:
            md = self.corpus.view_metadata(self.model.context_type)
            docs = label_fn(md)
            d_arr = _map_strarr_(d_arr, docs, k='i', new_k='doc')

    	return d_arr
Ejemplo n.º 24
0
def find_matches(mock, obs, opts):

    """
    Function to find matching galaxy members between mock haloes
    and observed clusters.
    """
    
    obs = obs[np.in1d(obs.mem_id, mock.m_mem_id, assume_unique = True)]
    
    mock = mock[np.in1d(mock.m_mem_id, obs.mem_id, assume_unique = True)]
    
    merged = np.lib.recfunctions.merge_arrays([obs, mock], flatten = True,
                                              usemask = False)

    clusters = []
    count = 0

    for id_val in np.unique(obs.id):
        clusters.append(Clusterx(count))
         
        for member in merged[obs.id == id_val]:
            clusters[count].add_mem(member)
            
        count += 1
    
    for cluster in clusters:
        cluster.props()
        cluster.halo_count()
        cluster.mass_hist(opts.mass_bin)

    return clusters
Ejemplo n.º 25
0
    def map_to_external_reference(self, roi, refname='HXB2', in_patient=True):
        '''
        return a map of positions in the patient to a reference genomewide
        Args:
            roi  --  region of interest given as a string or a tuple (start, end)
            refname --  reference to compare to
            in_patient -- specifies whether the (start, end) refers to reference or patient coordinates
        returns:
            a (len(roi), 3) array with reference coordinates in first column, 
                                        patient coordinates in second 
                                        roi coordinates in third column
        '''
        from .filenames import get_coordinate_map_filename
        coo_fn = get_coordinate_map_filename(self.name, 'genomewide', refname=refname)
        genomewide_map = np.loadtxt(coo_fn, dtype=int)

        if roi in self.annotation:
            roi_pos = np.array([x for x in self.annotation[roi]], dtype = int)
            ind = np.in1d(genomewide_map[:,1], roi_pos)
            roi_indices = np.in1d(roi_pos, genomewide_map[:,1]).nonzero()[0]
            return np.vstack((genomewide_map[ind].T, [roi_indices])).T

        elif roi == "genomewide":
            return np.vstack((genomewide_map.T, [genomewide_map[:,1]])).T            

        else:
            try:
                start, stop = map(int, roi)
                start_ind = np.searchsorted(genomewide_map[:,in_patient], start)
                stop_ind = np.searchsorted(genomewide_map[:,in_patient], stop)
                return np.vstack((genomewide_map[start_ind:stop_ind].T,
                                  [genomewide_map[start_ind:stop_ind, in_patient] - start])).T
            except:
                raise ValueError("ROI not understood")
Ejemplo n.º 26
0
def check_filter_labels(inverse=False):

    # create a feature set
    fs, _ = make_classification_data(num_examples=1000,
                                     num_features=4,
                                     num_labels=5,
                                     train_test_ratio=1.0)

    # keep just the instaces with 0, 1 and 2 labels
    labels_to_filter = [0, 1, 2]

    # do the actual filtering
    fs.filter(labels=labels_to_filter, inverse=inverse)

    # make sure that we removed the right things
    if inverse:
        ids_kept = fs.ids[np.where(np.logical_not(np.in1d(fs.labels,
                                                          labels_to_filter)))]
    else:
        ids_kept = fs.ids[np.where(np.in1d(fs.labels, labels_to_filter))]

    assert_array_equal(fs.ids, np.array(ids_kept))

    # make sure that number of ids, labels and features are the same
    eq_(fs.ids.shape[0], fs.labels.shape[0])
    eq_(fs.labels.shape[0], fs.features.shape[0])
Ejemplo n.º 27
0
    def _limit_features(self, csr_matrix, low=2, high=None, limit=None):
        """
        Lower bound on features, so that > n docs much contain the feature
        """
        
        assert isinstance(csr_matrix, scipy.sparse.csr_matrix) # won't work with other sparse matrices
        # (most can be converted with .tocsr() method)

        indices_to_remove = np.where(np.asarray(csr_matrix.sum(axis=0) < low)[0])[0]
        # csr_matrix.sum(axis=0) < low: returns Boolean matrix where total features nums < low
        # np.asarray: converts np.matrix to np.array
        # [0]: since the array of interest is the first (and only) item in an outer array
        # np.where: to go from True/False to indices of Trues

        
        data_filter = np.in1d(csr_matrix.indices, indices_to_remove)
        # gets boolean array, where the columns of any non-zero values are to be removed
        # (i.e. their index is in the indices_to_remove array)

        # following three lines for info/debugging purposes
        # to show how many unique features are being removed
        num_total_features = len(np.unique(csr_matrix.indices)) 
        num_features_to_remove = np.sum(np.in1d(indices_to_remove, np.unique(csr_matrix.indices)))
        print "%d/%d features will be removed" % (num_features_to_remove, num_total_features)

        csr_matrix.data[data_filter] = 0
        # set the values to be removed to 0 to start with

        csr_matrix.eliminate_zeros()
        # then run the np optimised routine to delete those 0's (and free a little memory)
        # NB zeros are superfluous since a sparse matrix

        return csr_matrix
def compute_mAP(index, good_index, junk_index):
    ap = 0
    cmc = torch.IntTensor(len(index)).zero_()
    if good_index.size==0:   # if empty
        cmc[0] = -1
        return ap,cmc

    # remove junk_index
    mask = np.in1d(index, junk_index, invert=True)
    index = index[mask]

    # find good_index index
    ngood = len(good_index)
    mask = np.in1d(index, good_index)
    rows_good = np.argwhere(mask==True)
    rows_good = rows_good.flatten()
    
    cmc[rows_good[0]:] = 1
    for i in range(ngood):
        d_recall = 1.0/ngood
        precision = (i+1)*1.0/(rows_good[i]+1)
        if rows_good[i]!=0:
            old_precision = i*1.0/rows_good[i]
        else:
            old_precision=1.0
        ap = ap + d_recall*(old_precision + precision)/2

    return ap, cmc
Ejemplo n.º 29
0
def untie(a,b):
    """
    
    Parameters
    ----------
    a
    b
    Returns
    -------
    boolean 
    a
    r 

    """
    la    = len(a)
    lb    = len(b)
    u     = np.intersect1d(a,b)
    lu    = len(u)
    #print lu
    #print min(la,lb)/2
    if lu >= min(la,lb)/2:
        # segment de a non commun avec b
        aa    = a[~np.in1d(a,u)]
        # segment de b non commun avec a
        bb    = b[~np.in1d(b,u)]
        r     = np.hstack((aa,bb))
        if la<lb:
            return(True,a,r)
        else:
            return(True,b,r)
    else:
        return(False,-1,-1)
Ejemplo n.º 30
0
def make_lineup(pa_transitions, non_pa_transitions, by_batting_order = True):
    #If by_batting_order is false, grab by position instead
    constructed_lineup = []
    for lineup_spot in range(1, num_lineup_spots+1):
        final_pa_transitions = []
        final_non_pa_transitions = []
        spot_to_take = lineup_spot if by_batting_order else batting_order_positions_1[lineup_spot - 1]
        print (lineup_spot, spot_to_take)
        current_pa_transitions = pa_transitions[np.in1d(pa_transitions[:, 0], spot_to_take)]
        print(current_pa_transitions)
        current_non_pa_transitions = non_pa_transitions[np.in1d(non_pa_transitions[:, 0], spot_to_take)]
        for start_state in range(0,num_start_states):
            pa_row = [0] * num_end_states
            non_pa_row = [0] * num_end_states
            pa_for_start_state = current_pa_transitions[np.in1d(current_pa_transitions[:, 1], start_state)]
            non_pa_for_start_state = current_non_pa_transitions[np.in1d(current_non_pa_transitions[:, 1], start_state)]
            for row in pa_for_start_state:
                pa_row[row[2]] = row[3]
            final_pa_transitions.append(pa_row)
            for row in non_pa_for_start_state:
                non_pa_row[row[2]] = row[3]
            final_non_pa_transitions.append(non_pa_row)
        #print(final_pa_transitions)
        constructed_lineup.append(LineupSpot(final_pa_transitions, final_non_pa_transitions))
    return constructed_lineup
    def _calculate_v_fits(self):
        """ Find lines that run vertically on the screen.
        """
        # binary_warped shape: (height, width)

        binary_warped = self.binary_warped

        left_lane_inds = []
        right_lane_inds = []

        left_margin = self.left_search_margin
        right_margin = self.right_search_margin

        # Pixels closer to the car are more important, so we apply weights the histogram
        weights = np.array([range(binary_warped.shape[0])
                            ])**self.closer_importance
        weighted = binary_warped * weights.T
        #         weighted = weights.T ** binary_warped
        #         weighted = binary_warped

        # For center weights, convert as follows:
        # 0 1 2 3 4 5 6 7 to 1 2 3 4 4 3 2 1
        # (changes: 1, 1, 1, 1, 0, -2, -4. -6)
        # In other words, points closer to the center have higher scores.

        cweights = np.array([range(binary_warped.shape[1])])
        hlen = int(cweights.shape[1] / 2)  # half-length
        adj = [1] * hlen  # adjustments
        for i in range(hlen):
            v = -i * 2
            adj.append(v)
        cweights += adj
        weighted *= cweights**self.center_importance

        # Sums all weighted points in the bottom 50% section (remember that bigger numbers are at the bottom).
        histogram = np.sum(weighted[int(weighted.shape[0] *
                                        self.v_hist_crop_top):, :],
                           axis=0)
        midpoint = np.int(histogram.shape[0] / 2)
        histogram_l = histogram[:(midpoint)]
        histogram_r = histogram[(midpoint):]

        # === SLIDING WINDOWS ===
        leftx_base = np.argmax(histogram_l) + self.vert_x_adjust[0]
        rightx_base = np.argmax(histogram_r) + midpoint + self.vert_x_adjust[1]

        # Making sure bases do not pass the center. We do not want to have left line starts from
        # right section and vice versa.
        if (leftx_base + left_margin) > midpoint:
            leftx_base = midpoint - left_margin
        if (rightx_base - right_margin) < midpoint:
            rightx_base = midpoint + right_margin
        # At this point, leftx_base and rightx_base should contain x position of each respective line.
        window_height = np.int((binary_warped.shape[0] *
                                (1.0 - self.v_win_crop_top)) / self.nwindows)

        # Identify the x and y positions of all nonzero pixels in the image
        nonzero = binary_warped.nonzero()
        nonzeroy = np.array(nonzero[0])
        nonzerox = np.array(nonzero[1])

        # Current positions to be updated for each window
        leftx_current = leftx_base
        rightx_current = rightx_base

        # Only used for debugging.
        if self.debug:
            out_img = np.dstack(
                (binary_warped, binary_warped, binary_warped)) * 255

        left_patience_counter = 0
        right_patience_counter = 0
        # The window takes into account x position found in the previous
        # centroid. It finds a centroid closest to it in the next iteration of window.
        # If there is no centroid x, use window center.
        # Global coordinate is used here.
        left_window_patience = self.window_patience
        right_window_patience = self.window_patience

        # Step through the windows one by one
        for window in range(self.nwindows):
            # Identify window boundaries in x and y (and right and left)
            # The higher win_y_low is, the closer to the top of the plot.
            win_y_low = binary_warped.shape[0] - (window + 1) * window_height
            win_y_high = binary_warped.shape[0] - window * window_height
            win_xleft_low = leftx_current - left_margin
            win_xleft_high = leftx_current + left_margin
            win_xright_low = rightx_current - right_margin
            win_xright_high = rightx_current + right_margin

            # Identify the nonzero pixels in x and y within the window
            good_left_inds = ((nonzeroy >= win_y_low) & (nonzeroy < win_y_high) & \
                              (nonzerox >= win_xleft_low) & (nonzerox < win_xleft_high)).nonzero()[0]
            good_right_inds = ((nonzeroy >= win_y_low) & (nonzeroy < win_y_high) & \
                               (nonzerox >= win_xright_low) & (nonzerox < win_xright_high)).nonzero()[0]

            # === Select better centroid (Left) ===
            # Find the centroids of pixels in the current window.
            connectivity = 4
            left_pixels_x = nonzerox[good_left_inds] - win_xleft_low
            left_pixels_y = nonzeroy[good_left_inds] - win_y_low
            pixels = np.zeros((window_height, (left_margin * 2)))

            best_centroid_x, best_pixels_pos = self._choose_best_centroid(
                pixels, leftx_current, win_xleft_low, left_pixels_x,
                left_pixels_y)

            if best_pixels_pos is not None:
                # Currently, best_pixels_pos contains the most relevant positions.
                # We just need to convert them into good_[left/right]_inds.
                best_pixels_pos += [win_y_low, win_xleft_low]
                bestx = best_pixels_pos[:, 1]
                besty = best_pixels_pos[:, 0]

                good_left_inds = np.intersect1d(np.argwhere(np.in1d(nonzerox,bestx)).flatten(), \
                    np.argwhere(np.in1d(nonzeroy,besty)).flatten()).tolist()

            # === END Select better centroid (Left)===

            # === Select better centroid (Right) ===
            # Find the centroids of pixels in the current window.
            connectivity = 4
            right_pixels_x = nonzerox[good_right_inds] - win_xright_low
            right_pixels_y = nonzeroy[good_right_inds] - win_y_low
            pixels = np.zeros((window_height, (right_margin * 2)))

            best_centroid_x, best_pixels_pos = self._choose_best_centroid(
                pixels, rightx_current, win_xright_low, right_pixels_x,
                right_pixels_y)

            if best_pixels_pos is not None:
                # Currently, best_pixels_pos contains the most relevant positions.
                # We just need to convert them into good_[left/right]_inds.
                best_pixels_pos += [win_y_low, win_xright_low]
                bestx = best_pixels_pos[:, 1]
                besty = best_pixels_pos[:, 0]

                good_right_inds = np.intersect1d(np.argwhere(np.in1d(nonzerox,bestx)).flatten(), \
                    np.argwhere(np.in1d(nonzeroy,besty)).flatten()).tolist()

            # === END Select better centroid (Right)===

#             right_previous_centroid_x, best_pixels_pos = self._choose_best_centroid(
#                 pixels, right_previous_centroid_x, right_pixels_x, right_pixels_y)

# If any of the pixels touches left/right section, stop when there is no more pixel to add.
# We do this by setting the patience to 1.
            if np.any(nonzerox[good_left_inds] == 0):
                left_window_patience = 1
            if np.any(nonzerox[good_right_inds] == binary_warped.shape[1]):
                right_window_patience = 1

            # If sliding windows do not find enough pixels for some iterations, give up.
            if left_patience_counter > left_window_patience:
                pass
            else:
                if len(good_left_inds) <= self.window_empty_px:
                    left_patience_counter += 1
                else:
                    left_patience_counter = 0

                    # Append these indices to the lists
                    left_lane_inds.append(good_left_inds)

                # If you found > minpix pixels, recenter next window on their mean position
                if len(good_left_inds) > self.window_minpix:
                    leftx_current = np.int(np.mean(nonzerox[good_left_inds]))

                # === DEBUGGING SLIDING WINDOWS ===
                if self.debug:
                    cv2.rectangle(out_img, (win_xleft_low, win_y_low),
                                  (win_xleft_high, win_y_high), (255, 0, 0),
                                  math.ceil(2 * self.scale))
                # === END DEBUGGING SLIDING WINDOWS ===

            if right_patience_counter > right_window_patience:
                pass
            else:
                if len(good_right_inds) <= self.window_empty_px:
                    right_patience_counter += 1
                else:
                    right_patience_counter = 0

                    # Append these indices to the lists
                    right_lane_inds.append(good_right_inds)

                # If you found > minpix pixels, recenter next window on their mean position
                if len(good_right_inds) > self.window_minpix:
                    rightx_current = np.int(np.mean(nonzerox[good_right_inds]))

                # === DEBUGGING SLIDING WINDOWS ===
                if self.debug:
                    cv2.rectangle(out_img, (win_xright_low, win_y_low),
                                  (win_xright_high, win_y_high), (0, 255, 0),
                                  math.ceil(2 * self.scale))
                # === END DEBUGGING SLIDING WINDOWS ===

        # === END SLIDING WINDOWS ===
        fits = self._wrap_up_windows(left_lane_inds, right_lane_inds, nonzerox,
                                     nonzeroy, 'v')

        # === DEBUGGING ===
        if self.debug:
            if self.debug_dir == 'v':
                plt.imshow(out_img, cmap='gray')
            # Normalize histogram values so they don't go beyond image height.
            maxval = np.amax(histogram)
            hist_viz = np.copy(histogram)
            if maxval != 0:
                hist_viz = (hist_viz / maxval) * binary_warped.shape[0]

            # Subtract histogram values from max values so the histogram can be drawn
            # at the bottom of the plot.
            hist_viz = binary_warped.shape[0] - hist_viz
            # Plot histogram
            if self.debug_axes:
                self.debug_axes.plot(hist_viz, '-', c='#00FFFF', lw=2)

        # === END DEBUGGING ===

        return (fits, histogram_l, histogram_r)
@Project :
@File: 唯一化以及其他的集合逻辑.py
@Author: liuwz
@time: 2021/12/26 11:11 上午
@desc: 
"""
import numpy as np

names = np.array(['Joe', 'Bob', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])

# unique 去重并排序
names = np.unique(names)
print(names)

# intersect1d 返回公共元素 并排序
x = np.arange(5)
y = np.arange(3, 8)
print(np.intersect1d(x, y))

# in1d 返回一个bool数组 表示一个数组中的值是否在另一个数组中
print(np.in1d(x, y))

# setdiff1d 返回集合的差,元素在x中不在y中
print(np.setdiff1d(x, y))

# 返回对称差:只存在一个数组的元素(不同时存在于两个数组)
print(np.setxor1d(x, y))



Ejemplo n.º 33
0
    def set_neuron_param(self, params, neurons=None, group=None):
        '''
        Set the parameters of specific neurons or of a whole group.

        .. versionadded:: 1.0

        Parameters
        ----------
        params : dict
            Dictionary containing parameters for the neurons. Entries can be
            either a single number (same for all neurons) or a list (one entry
            per neuron).
        neurons : list of ints, optional (default: None)
            Ids of the neurons whose parameters should be modified.
        group : list of strings, optional (default: None)
            List of strings containing the names of the groups whose parameters
            should be updated. When modifying neurons from a single group, it
            is still usefull to specify the group name to speed up the pace.

        Note
        ----
        If both `neurons` and `group` are None, all neurons will be modified.

        Warning
        -------
        No check is performed on the validity of the parameters, which means
        that errors will only be detected when building the graph in NEST.
        '''
        if self._to_nest:
            raise RuntimeError("Parameters cannot be changed after the "
                               "network has been sent to NEST!")

        if neurons is not None:  # specific neuron ids
            groups = []
            # get the groups they could belong to
            if group is not None:
                if nonstring_container(group):
                    groups.extend((self[g] for g in group))
                else:
                    groups.append(self[group])
            else:
                groups.extend(self.values())
            # update the groups parameters
            for g in groups:
                idx = np.where(np.in1d(g.ids, neurons, assume_unique=True))[0]
                # set the properties of the nodes for each entry in params
                for k, v in params.items():
                    default = np.NaN
                    if k in g.neuron_param:
                        default = g.neuron_param[k]
                    elif nngt.get_config('with_nest'):
                        try:
                            import nest
                            try:
                                default = nest.GetDefaults(g.neuron_model, k)
                            except nest.NESTError:
                                pass
                        except ImportError:
                            pass
                    vv      = np.repeat(default, g.size)
                    vv[idx] = v
                    # update
                    g.neuron_param[k] = vv
        else:  # all neurons in one or several groups
            group = self.keys() if group is None else group
            if not nonstring_container(group):
                group = [group]
            start = 0
            for name in group:
                g = self[name]
                for k, v in params.items():
                    if nonstring_container(v):
                        g.neuron_param[k] = v[start:start+g.size]
                    else:
                        g.neuron_param[k] = v
                start += g.size
Ejemplo n.º 34
0
    def get_adsorption_edges(
            self,
            symmetric=True,
            periodic=True):
        ''' Return the edges of adsorption sties defined as all regions
            with adjacent vertices.

        Parameters
        ----------
        symmetric : bool
            Return only the symmetrically reduced edges.
        periodic : bool
            Return edges which are unique via periodicity.

        Returns
        -------
        edges : ndarray (n, 2)
            All edges crossing ridge or vertices indexed by the expanded
            unit slab.

        '''
        vt = scipy.spatial.Voronoi(self.coordinates[:, :2],
                                   qhull_options='Qbb Qc Qz C{}'.format(1e-2))

        select, lens = [], []
        for i, p in enumerate(vt.point_region):
            select += [vt.regions[p]]
            lens += [len(vt.regions[p])]

        dmax = max(lens)
        regions = np.zeros((len(select), dmax), int)
        mask = np.arange(dmax) < np.array(lens)[:, None]
        regions[mask] = np.concatenate(select)

        site_id = self.get_symmetric_sites(unique=False, screen=False)
        site_id = site_id + self.connectivity / 10
        per = self.get_periodic_sites(screen=False)

        uper = self.get_periodic_sites()
        edges, symmetry, uniques = [], [], []
        for i, p in enumerate(uper):
            poi = vt.point_region[p]
            voi = vt.regions[poi]

            for v in voi:
                nr = np.where(regions == v)[0]

                for n in nr:
                    edge = sorted((p, n))

                    if n in uper[:i + 1] or edge in edges:
                        continue

                    if (np.in1d(per[edge], per[uper[:i]]).any()) and periodic:
                        continue

                    sym = sorted(site_id[edge])
                    if sym in symmetry:
                        uniques += [False]
                    else:
                        uniques += [True]
                        symmetry += [sym]

                    edges += [edge]

        edges = np.array(edges)
        if symmetric:
            edges = edges[uniques]

        return edges
Ejemplo n.º 35
0
    def __init__(
            self,
            slab,
            surface_atoms=None,
            tol=1e-5):
        ''' Create an extended unit cell of the surface sites for
        use in identifying other sites.

        Parameters
        ----------
        slab : Gatoms object
            The slab associated with the adsorption site network to be
            attached.
        tol : float
            Absolute tolerance for floating point errors.

        '''
        index, coords, offsets = Utils().expand_cell(slab, cutoff=5.0)
        if surface_atoms is None:
            surface_atoms = slab.get_surface_atoms()
        if surface_atoms is None:
            raise ValueError('Slab must contain surface atoms')

        extended_top = np.where(np.in1d(index, surface_atoms))[0]

        self.tol = tol
        self.coordinates = coords[extended_top].tolist()
        self.connectivity = np.ones(extended_top.shape[0]).tolist()
        self.r1_topology = [[i] for i in np.arange(len(extended_top))]
        self.index = index[extended_top]

        sites = self._get_higher_coordination_sites(coords[extended_top])
        self.r2_topology = sites['top'][2]

        # Put data into array format
        selection = ['bridge', 'hollow', '4fold']
        for i, k in enumerate(selection):
            coordinates, r1top, r2top = sites[k]

            if k in ['hollow', '4fold']:
                r2top = [[] for _ in coordinates]

            self.connectivity += (np.ones(len(coordinates)) * (i + 2)).tolist()
            self.coordinates += coordinates
            self.r1_topology += r1top
            self.r2_topology += r2top

        self.coordinates = np.array(self.coordinates)
        self.connectivity = np.array(self.connectivity, dtype=int)
        self.r1_topology = np.array(self.r1_topology, dtype=object)
        self.r2_topology = np.array(self.r2_topology, dtype=object)
        self.frac_coords = np.dot(self.coordinates, np.linalg.pinv(slab.cell))
        self.slab = slab

        screen = (self.frac_coords[:, 0] > 0 - self.tol) & \
                 (self.frac_coords[:, 0] < 1 - self.tol) & \
                 (self.frac_coords[:, 1] > 0 - self.tol) & \
                 (self.frac_coords[:, 1] < 1 - self.tol)

        self.screen = screen
        self._symmetric_sites = None
Ejemplo n.º 36
0
            if True in np.isnan(surf_data):
                print rest_interp_f
                surf_data = surf_data.squeeze()
                surf_f = '%s/fsaverage5/surf/%s.orig' % (fsDir, hemi)
                surf_faces = nib.freesurfer.io.read_geometry(surf_f)[1]
                mask = np.zeros((10242))

                while True in np.isnan(surf_data):
                    nans = np.unique(np.where(np.isnan(surf_data))[0])
                    mask[nans] = 1
                    bad = []
                    good = {}
                    for node in nans:
                        neighbors = np.unique(surf_faces[np.where(
                            np.in1d(surf_faces.ravel(),
                                    [node]).reshape(surf_faces.shape))[0]])
                        bad_neighbors = neighbors[np.unique(
                            np.where(np.isnan(surf_data[neighbors]))[0])]
                        good_neighbors = np.setdiff1d(neighbors, bad_neighbors)
                        bad.append((node, len(bad_neighbors)))
                        good[node] = good_neighbors
                    bad = np.array(bad).transpose()
                    nodes_with_least_bad_neighbors = bad[0][bad[1] == np.min(
                        bad[1])]
                    for node in nodes_with_least_bad_neighbors:
                        surf_data[node] = np.mean(surf_data[list(good[node])],
                                                  axis=0)

                brain = Brain('fsaverage5', hemi, 'pial', curv=False)
                brain.add_data(mask,
                               mask.min(),
Ejemplo n.º 37
0
def test_write_labels_to_annot():
    """Test writing FreeSurfer parcellation from labels"""
    tempdir = _TempDir()

    labels = read_labels_from_annot('sample', subjects_dir=subjects_dir)

    # create temporary subjects-dir skeleton
    surf_dir = op.join(subjects_dir, 'sample', 'surf')
    temp_surf_dir = op.join(tempdir, 'sample', 'surf')
    os.makedirs(temp_surf_dir)
    shutil.copy(op.join(surf_dir, 'lh.white'), temp_surf_dir)
    shutil.copy(op.join(surf_dir, 'rh.white'), temp_surf_dir)
    os.makedirs(op.join(tempdir, 'sample', 'label'))

    # test automatic filenames
    dst = op.join(tempdir, 'sample', 'label', '%s.%s.annot')
    write_labels_to_annot(labels, 'sample', 'test1', subjects_dir=tempdir)
    assert_true(op.exists(dst % ('lh', 'test1')))
    assert_true(op.exists(dst % ('rh', 'test1')))
    # lh only
    for label in labels:
        if label.hemi == 'lh':
            break
    write_labels_to_annot([label], 'sample', 'test2', subjects_dir=tempdir)
    assert_true(op.exists(dst % ('lh', 'test2')))
    assert_true(op.exists(dst % ('rh', 'test2')))
    # rh only
    for label in labels:
        if label.hemi == 'rh':
            break
    write_labels_to_annot([label], 'sample', 'test3', subjects_dir=tempdir)
    assert_true(op.exists(dst % ('lh', 'test3')))
    assert_true(op.exists(dst % ('rh', 'test3')))
    # label alone
    assert_raises(TypeError, write_labels_to_annot, labels[0], 'sample',
                  'test4', subjects_dir=tempdir)

    # write left and right hemi labels with filenames:
    fnames = [op.join(tempdir, hemi + '-myparc') for hemi in ['lh', 'rh']]
    with warnings.catch_warnings(record=True):  # specify subject_dir param
        for fname in fnames:
                write_labels_to_annot(labels, annot_fname=fname)

    # read it back
    labels2 = read_labels_from_annot('sample', subjects_dir=subjects_dir,
                                     annot_fname=fnames[0])
    labels22 = read_labels_from_annot('sample', subjects_dir=subjects_dir,
                                      annot_fname=fnames[1])
    labels2.extend(labels22)

    names = [label.name for label in labels2]

    for label in labels:
        idx = names.index(label.name)
        assert_labels_equal(label, labels2[idx])

    # same with label-internal colors
    for fname in fnames:
        write_labels_to_annot(labels, 'sample', annot_fname=fname,
                              overwrite=True, subjects_dir=subjects_dir)
    labels3 = read_labels_from_annot('sample', subjects_dir=subjects_dir,
                                     annot_fname=fnames[0])
    labels33 = read_labels_from_annot('sample', subjects_dir=subjects_dir,
                                      annot_fname=fnames[1])
    labels3.extend(labels33)
    names3 = [label.name for label in labels3]
    for label in labels:
        idx = names3.index(label.name)
        assert_labels_equal(label, labels3[idx])

    # make sure we can't overwrite things
    assert_raises(ValueError, write_labels_to_annot, labels, 'sample',
                  annot_fname=fnames[0], subjects_dir=subjects_dir)

    # however, this works
    write_labels_to_annot(labels, 'sample', annot_fname=fnames[0],
                          overwrite=True, subjects_dir=subjects_dir)

    # label without color
    labels_ = labels[:]
    labels_[0] = labels_[0].copy()
    labels_[0].color = None
    write_labels_to_annot(labels_, 'sample', annot_fname=fnames[0],
                          overwrite=True, subjects_dir=subjects_dir)

    # duplicate color
    labels_[0].color = labels_[2].color
    assert_raises(ValueError, write_labels_to_annot, labels_, 'sample',
                  annot_fname=fnames[0], overwrite=True,
                  subjects_dir=subjects_dir)

    # invalid color inputs
    labels_[0].color = (1.1, 1., 1., 1.)
    assert_raises(ValueError, write_labels_to_annot, labels_, 'sample',
                  annot_fname=fnames[0], overwrite=True,
                  subjects_dir=subjects_dir)

    # overlapping labels
    labels_ = labels[:]
    cuneus_lh = labels[6]
    precuneus_lh = labels[50]
    labels_.append(precuneus_lh + cuneus_lh)
    assert_raises(ValueError, write_labels_to_annot, labels_, 'sample',
                  annot_fname=fnames[0], overwrite=True,
                  subjects_dir=subjects_dir)

    # unlabeled vertices
    labels_lh = [label for label in labels if label.name.endswith('lh')]
    write_labels_to_annot(labels_lh[1:], 'sample', annot_fname=fnames[0],
                          overwrite=True, subjects_dir=subjects_dir)
    labels_reloaded = read_labels_from_annot('sample', annot_fname=fnames[0],
                                             subjects_dir=subjects_dir)
    assert_equal(len(labels_lh), len(labels_reloaded))
    label0 = labels_lh[0]
    label1 = labels_reloaded[-1]
    assert_equal(label1.name, "unknown-lh")
    assert_true(np.all(np.in1d(label0.vertices, label1.vertices)))

    # unnamed labels
    labels4 = labels[:]
    labels4[0].name = None
    assert_raises(ValueError, write_labels_to_annot, labels4,
                  annot_fname=fnames[0])
Ejemplo n.º 38
0
def ClusterTree(D, adj_list):

    """
    Compute Ward clustering linkage matrix for given similarity matrix
    adjacency structure.

    Parameters:
    - - - - -
    D : array
        similarity matrix
    adj_list : dictionary
        adjacency list

    Returns:
    - - - -
    Z : array
        linkage matrix
    """

    X = D

    # Compute squared euclidean distance Y between rows
    Qx = np.tile(np.linalg.norm(X, axis=1)**2,(X.shape[0],1))
    Y = Qx + Qx.transpose()-2*np.dot(X, X.transpose())
    Y = spatial.distance.squareform(Y,checks=False)
    Y[Y<0] = 0  # Correct for numerical errors in very similar rows

    print('Similarity shape: {:}'.format(Y.shape))

    # Construct adjacency matrix
    N = len(adj_list)
    A = np.zeros([N, N], dtype=bool)
    for i in range(N):
        A[i, adj_list[i]] = True
    connected = spatial.distance.squareform(A).astype(bool)
    print('Connected shape: {:}'.format(connected.shape))

    # Initialize all data structures
    valid_clusts = np.ones(N, dtype=bool)   # which clusters still remain
    col_limits = np.cumsum(np.concatenate((np.array([N-2]),
                                           np.arange(N-2, 0, -1))))

    # During updating clusters, cluster index is constantly changing, R is
    # a index vector mapping the original index to the current (row, column)
    # index in Y.  C denotes how many points are contained in each cluster.
    m = int(np.ceil(np.sqrt(2*Y.shape[0])))
    C = np.zeros(2*m-1)
    C[0:m] = 1
    R = np.arange(m)
    all_inds = np.arange(Y.shape[0])

    # pairs of adjacent clusters that can be merged
    conn_inds = all_inds[connected]
    Z = np.zeros([m-1, 4])

    for s in range(m-1):
        if conn_inds.size == 0:
            # The graph was disconnected (e.g. two hemispheres)
            # Just add all connections to finish up cluster tree
            connected = np.zeros(len(connected))
            conn_inds = []
            valid_clust_inds = np.flatnonzero(valid_clusts)

            for i in valid_clust_inds:
                U = valid_clusts
                U[i] = 0
                new_conns = PdistInds(i, N, U)
                connected[new_conns] = True
                conn_inds = np.concatenate((conn_inds, new_conns))

            conn_inds = np.unique(conn_inds)

        # Find closest pair of clusters
        v = np.amin(Y[conn_inds])
        k = conn_inds[np.argmin(Y[conn_inds])]

        j = np.where(k <= col_limits)[0][0]
        i = N - (col_limits[j] - k) - 1

        # Add row to output linkage
        Z[s, 0:3] = np.array([R[i], R[j], v])

        # Update Y with this new cluster i containing old clusters i and j
        U = valid_clusts
        U[np.array([i, j])] = 0
        oldI = PdistInds(i, N, U)
        oldJ = PdistInds(j, N, U)
        Y[oldI] = ((
            C[R[U]]+C[R[i]])*Y[oldI] +
            (C[R[U]]+C[R[j]])*Y[oldJ] -
            C[R[U]]*v)/(C[R[i]]+C[R[j]] + C[R[U]])

        # Add j's connections to new cluster i
        new_conns = connected[oldJ] & ~connected[oldI]
        connected[oldI] = connected[oldI] | new_conns
        conn_inds = np.sort(np.concatenate((conn_inds, oldI[new_conns])))

        # Remove all of j's connections from conn_inds and connected
        U[i] = 1
        J = PdistInds(j, N, U)
        conn_inds = conn_inds[np.in1d(conn_inds, J, assume_unique=True,
                                      invert=True).astype(np.int)]
        connected[J] = np.zeros(len(J))

        valid_clusts[j] = 0
        # update m, N, R
        C[m+s] = C[R[i]] + C[R[j]]
        Z[s, 3] = C[m+s]
        R[i] = m+s

    Z[:, 2] = np.sqrt(Z[:, 2])
    return Z
Ejemplo n.º 39
0
def plot_projs_joint(projs,
                     evoked,
                     picks_trace=None,
                     *,
                     topomap_kwargs=None,
                     show=True,
                     verbose=None):
    """Plot projectors and evoked jointly.

    Parameters
    ----------
    projs : list of Projection
        The projectors to plot.
    evoked : instance of Evoked
        The data to plot. Typically this is the evoked instance created from
        averaging the epochs used to create the projection.
    %(picks_plot_projs_joint_trace)s
    topomap_kwargs : dict | None
        Keyword arguments to pass to :func:`mne.viz.plot_projs_topomap`.
    %(show)s
    %(verbose)s

    Returns
    -------
    fig : instance of matplotlib Figure
        The figure.

    Notes
    -----
    This function creates a figure with three columns:

    1. The left shows the evoked data traces before (black) and after (green)
       projection.
    2. The center shows the topomaps associated with each of the projectors.
    3. The right again shows the data traces (black), but this time with:

       1. The data projected onto each projector with a single normalization
          factor (solid lines). This is useful for seeing the relative power
          in each projection vector.
       2. The data projected onto each projector with individual normalization
          factors (dashed lines). This is useful for visualizing each time
          course regardless of its power.
       3. Additional data traces from ``picks_trace`` (solid yellow lines).
          This is useful for visualizing the "ground truth" of the time
          course, e.g. the measured EOG or ECG channel time courses.

    .. versionadded:: 1.1
    """
    import matplotlib.pyplot as plt
    from ..evoked import Evoked
    _validate_type(evoked, Evoked, 'evoked')
    _validate_type(topomap_kwargs, (None, dict), 'topomap_kwargs')
    projs = _check_type_projs(projs)
    topomap_kwargs = dict() if topomap_kwargs is None else topomap_kwargs
    if picks_trace is not None:
        picks_trace = _picks_to_idx(evoked.info,
                                    picks_trace,
                                    allow_empty=False)
    info = evoked.info
    ch_types = evoked.get_channel_types(unique=True, only_data_chs=True)
    proj_by_type = dict()  # will be set up like an enumerate key->[pi, proj]
    ch_names_by_type = dict()
    used = np.zeros(len(projs), int)
    for ch_type in ch_types:
        these_picks = _picks_to_idx(info, ch_type, allow_empty=True)
        these_chs = [evoked.ch_names[pick] for pick in these_picks]
        ch_names_by_type[ch_type] = these_chs
        for pi, proj in enumerate(projs):
            if not set(these_chs).intersection(proj['data']['col_names']):
                continue
            if ch_type not in proj_by_type:
                proj_by_type[ch_type] = list()
            proj_by_type[ch_type].append([pi, deepcopy(proj)])
            used[pi] += 1
    missing = (~used.astype(bool)).sum()
    if missing:
        warn(f'{missing} projector{_pl(missing)} had no channel names '
             'present in epochs')
    del projs
    ch_types = list(proj_by_type)  # reduce to number we actually need
    # room for legend
    max_proj_per_type = max(len(x) for x in proj_by_type.values())
    cs_trace = 3
    cs_topo = 2
    n_col = max_proj_per_type * cs_topo + 2 * cs_trace
    n_row = len(ch_types)
    shape = (n_row, n_col)
    fig = plt.figure(figsize=(n_col * 1.1 + 0.5, n_row * 1.8 + 0.5),
                     constrained_layout=True)
    ri = 0
    # pick some sufficiently distinct colors (6 per proj type, e.g., ECG,
    # should be enough hopefully!)
    # https://personal.sron.nl/~pault/data/colourschemes.pdf
    # "Vibrant" color scheme
    proj_colors = [
        '#CC3311',  # red
        '#009988',  # teal
        '#0077BB',  # blue
        '#EE3377',  # magenta
        '#EE7733',  # orange
        '#33BBEE',  # cyan
    ]
    trace_color = '#CCBB44'  # yellow
    after_color, after_name = '#228833', 'green'
    type_titles = DEFAULTS['titles']
    last_ax = [None] * 2
    first_ax = dict()
    pe_kwargs = dict(show=False, draw=False)
    for ch_type, these_projs in proj_by_type.items():
        these_idxs, these_projs = zip(*these_projs)
        ch_names = ch_names_by_type[ch_type]
        idx = np.where([
            np.in1d(ch_names, proj['data']['col_names']).all()
            for proj in these_projs
        ])[0]
        used[idx] += 1
        count = len(these_projs)
        for proj in these_projs:
            sub_idx = [
                proj['data']['col_names'].index(name) for name in ch_names
            ]
            proj['data']['data'] = proj['data']['data'][:, sub_idx]
            proj['data']['col_names'] = ch_names
        ba_ax = plt.subplot2grid(shape, (ri, 0), colspan=cs_trace, fig=fig)
        topo_axes = [
            plt.subplot2grid(shape, (ri, ci * cs_topo + cs_trace),
                             colspan=cs_topo,
                             fig=fig) for ci in range(count)
        ]
        tr_ax = plt.subplot2grid(shape, (ri, n_col - cs_trace),
                                 colspan=cs_trace,
                                 fig=fig)
        # topomaps
        _plot_projs_topomap(these_projs,
                            info=info,
                            show=False,
                            axes=topo_axes,
                            **topomap_kwargs)
        for idx, proj, ax_ in zip(these_idxs, these_projs, topo_axes):
            ax_.set_title('')  # could use proj['desc'] but it's long
            ax_.set_xlabel(f'projs[{idx}]', fontsize='small')
        unit = DEFAULTS['units'][ch_type]
        # traces
        this_evoked = evoked.copy().pick_channels(ch_names)
        p = np.concatenate([p['data']['data'] for p in these_projs])
        assert p.shape == (len(these_projs), len(this_evoked.data))
        traces = np.dot(p, this_evoked.data)
        traces *= np.sign(np.mean(np.dot(this_evoked.data, traces.T),
                                  0))[:, np.newaxis]
        if picks_trace is not None:
            ch_traces = evoked.data[picks_trace]
            ch_traces -= np.mean(ch_traces, axis=1, keepdims=True)
            ch_traces /= np.abs(ch_traces).max()
        _plot_evoked(this_evoked, picks='all', axes=[tr_ax], **pe_kwargs)
        for line in tr_ax.lines:
            line.set(lw=0.5, zorder=3)
        for t in list(tr_ax.texts):
            t.remove()
        scale = 0.8 * np.abs(tr_ax.get_ylim()).max()
        hs, labels = list(), list()
        traces /= np.abs(traces).max()  # uniformly scaled
        for ti, trace in enumerate(traces):
            hs.append(
                tr_ax.plot(this_evoked.times,
                           trace * scale,
                           color=proj_colors[ti % len(proj_colors)],
                           zorder=5)[0])
            labels.append(f'projs[{these_idxs[ti]}]')
        traces /= np.abs(traces).max(1, keepdims=True)  # independently
        for ti, trace in enumerate(traces):
            tr_ax.plot(this_evoked.times,
                       trace * scale,
                       color=proj_colors[ti % len(proj_colors)],
                       zorder=3.5,
                       ls='--',
                       lw=1.,
                       alpha=0.75)
        if picks_trace is not None:
            trace_ch = [evoked.ch_names[pick] for pick in picks_trace]
            if len(picks_trace) == 1:
                trace_ch = trace_ch[0]
            hs.append(
                tr_ax.plot(this_evoked.times,
                           ch_traces.T * scale,
                           color=trace_color,
                           lw=3,
                           zorder=4,
                           alpha=0.75)[0])
            labels.append(str(trace_ch))
        tr_ax.set(title='', xlabel='', ylabel='')
        # This will steal space from the subplots in a constrained layout
        # https://matplotlib.org/3.5.0/tutorials/intermediate/constrainedlayout_guide.html#legends  # noqa: E501
        tr_ax.legend(hs,
                     labels,
                     loc='center left',
                     borderaxespad=0.05,
                     bbox_to_anchor=[1.05, 0.5])
        last_ax[1] = tr_ax
        key = 'Projected time course'
        if key not in first_ax:
            first_ax[key] = tr_ax
        # Before and after traces
        _plot_evoked(this_evoked, picks='all', axes=[ba_ax], **pe_kwargs)
        for line in ba_ax.lines:
            line.set(lw=0.5, zorder=3)
        loff = len(ba_ax.lines)
        this_proj_evoked = this_evoked.copy().add_proj(these_projs)
        # with meg='combined' any existing mag projectors (those already part
        # of evoked before we add_proj above) will have greatly
        # reduced power, so we ignore the warning about this issue
        this_proj_evoked.apply_proj(verbose='error')
        _plot_evoked(this_proj_evoked, picks='all', axes=[ba_ax], **pe_kwargs)
        for line in ba_ax.lines[loff:]:
            line.set(lw=0.5, zorder=4, color=after_color)
        for t in list(ba_ax.texts):
            t.remove()
        ba_ax.set(title='', xlabel='')
        ba_ax.set(ylabel=f'{type_titles[ch_type]}\n{unit}')
        last_ax[0] = ba_ax
        key = f'Before (black) and after ({after_name})'
        if key not in first_ax:
            first_ax[key] = ba_ax
        ri += 1
    for ax in last_ax:
        ax.set(xlabel='Time (sec)')
    for title, ax in first_ax.items():
        ax.set_title(title, fontsize='medium')
    plt_show(show)
    return fig
Ejemplo n.º 40
0
def calculate_bayes_factors(fold_change, output_file, essential_genes,
                            non_essential_genes, columns_to_test, network_file,
                            align_info, use_bootstrapping, use_small_sample,
                            filter_multi_target, loci_without_mismatch,
                            loci_with_mismatch, bootstrap_iterations,
                            no_of_cross_validations, sgrna_bayes_factors,
                            equalise_sgrna_no, seed, run_test_mode,
                            equalise_rep_no):
    """
    \b
    Calculate Bayes Factors from an input fold change file:

        \b
        BAGEL.py bf -i [fold change] -o [output file] -e [essentials genes] -n [nonessentials genes] -c [columns]


    \b
    Calculates a log2 Bayes Factor for each gene. Positive BFs indicate confidence that the gene is essential.
    Output written to the [output file] contains: gene name, mean Bayes Factor across all iterations, std deviation of
    BFs, and number of iterations in which the gene was part of the test set (and a BF was calculated[output file].


    \b
    Required options:
        -i --fold-change [fold change file]                     Tab-delimited file of reagents and fold changes
                                                                (see documentation for format).
        -o, --output-file [output file]                         Output filename
        -e, --essential-genes [reference essentials]            File with list of training set of essential genes
        -n, --non-essential-genes [reference nonessentials]     File with list of training set of nonessential genes
        -c [columns to test]                                    comma-delimited list of columns in input file to
                                                                include in analyisis

    \b
    Network options:
        -w  [network file]    Enable Network boosting. Tab-delmited file of edges. [GeneA (\\t) GeneB]\n'

    \b
    Multi-target guides filtering options:
        -m, --filter-multi-target     Enable filtering multi-targeting guide RNAs
        --align-info  [file]          Input precalculated align-info file
        -m0, --loci-without-mismatch  Filtering guide RNAs without mismatch targeting over than [N] loci, default = 10
        -m1, --loci-with-mismatch     Filtering guide RNAs with 1-bp mismatch targeting over than [N] loci, default = 10

    \b
    Other options:
        -b, --bootstrapping            Use bootstrapping instead of cross-validation (Slow)
        -s, --small-sample             Low-fat BAGEL, Only resampled training set (Bootstrapping, iteration = 100)
        -r  --sgrna-bayes-factors      Calculate sgRNA-wise Bayes Factor
        -f  --equalise-sgrna-no        Equalize the number of sgRNAs per gene to particular value [Number]
        -p  --equalise-rep-no          Equalize the number of repicates to particular value [Number]
        -N  --no-of-cross-validations  Number of sections for cross validation (default 10)
        -NB  --bootstraps-iterations   Number of bootstrap iterations (default 1000)
        -s, --seed=N                   Define random seed
        -h, --help                     Show this help text

    \b
    Example:

        \b
        BAGEL.py bf -i fc_file.txt -o results.bf -e ess_training_set.txt -n noness_training_set.txt -c 1,2,3

    """
    np.random.seed(seed)  # set random seed
    if network_file:
        network_boost = True
    else:
        network_boost = False

    if sgrna_bayes_factors:
        rna_level = True
    else:
        rna_level = False

    if network_file and sgrna_bayes_factors:
        network_boost = False

    if equalise_sgrna_no:
        flat_sgrna = True
    else:
        flat_sgrna = False

    if equalise_rep_no:
        flat_rep = True
    else:
        flat_rep = False

    if use_small_sample:
        train_method = 0
        bootstrap_iterations = 100

    elif use_bootstrapping:
        train_method = 0

    else:
        train_method = 1

    genes = {}
    fc = {}
    gene2rna = {}
    rna2gene = {}

    multi_targeting_sgrnas = dict()
    multi_targeting_sgrnas_info = dict()

    if filter_multi_target:

        try:
            aligninfo = pd.read_csv(align_info,
                                    header=None,
                                    index_col=0,
                                    sep="\t").fillna("")
            for seqid in aligninfo.index:
                perfectmatch = 0
                mismatch_1bp = 0
                perfectmatch_gene = 0
                mismatch_1bp_gene = 0
                if aligninfo[1][seqid] != "":
                    perfectmatch = len(aligninfo[1][seqid].split(","))
                if aligninfo[2][seqid] != "":
                    perfectmatch_gene = len(aligninfo[2][seqid].split(","))
                if aligninfo[3][seqid] != "":
                    mismatch_1bp = len(aligninfo[3][seqid].split(","))
                if aligninfo[4][seqid] != "":
                    mismatch_1bp_gene = len(aligninfo[4][seqid].split(","))
                if perfectmatch > loci_without_mismatch or mismatch_1bp > loci_with_mismatch:
                    multi_targeting_sgrnas[seqid] = True
                elif perfectmatch > 1 or mismatch_1bp > 0:
                    multi_targeting_sgrnas_info[seqid] = (perfectmatch,
                                                          mismatch_1bp,
                                                          perfectmatch_gene,
                                                          mismatch_1bp_gene)

        except:
            print("Please check align-info file")
            sys.exit(1)

        print("Total %d multi-targeting gRNAs are discarded" %
              len(multi_targeting_sgrnas))

    #
    # LOAD FOLDCHANGES
    #
    rnatagset = set()
    with open(fold_change) as fin:
        fieldname = fin.readline().rstrip().split('\t')
        #
        # DEFINE CONTROLS
        #
        columns = columns_to_test.split(',')
        try:
            try:
                column_list = list(map(int, columns))
                column_labels = [fieldname[x + 1] for x in column_list]
            except ValueError:
                column_labels = columns
                column_list = [
                    x for x in range(len(fieldname) - 1)
                    if fieldname[x + 1] in column_labels
                ]  # +1 because of First column start 2
            print("Using column:  " + ", ".join(column_labels))
        # print "Using column:  " + ", ".join(map(str,column_list))

        except:
            print("Invalid columns")
            sys.exit(1)

        for line in fin:
            fields = line.rstrip().split('\t')
            rnatag = fields[0]
            if filter_multi_target is True:  # multitargeting sgrna filtering
                if rnatag in multi_targeting_sgrnas:
                    continue  # skip multitargeting sgrna.
            if rnatag in rnatagset:
                print("Error! sgRNA tag duplicates")
                sys.exit(1)
            rnatagset.add(rnatag)
            gsym = fields[1]

            genes[gsym] = 1
            if gsym not in gene2rna:
                gene2rna[gsym] = []
            gene2rna[gsym].append(rnatag)
            rna2gene[rnatag] = gsym
            fc[rnatag] = {}
            for i in column_list:
                fc[rnatag][i] = float(
                    fields[i + 1]
                )  # per user docs, GENE is column 0, first data column is col 1.

    genes_array = np.array(list(genes.keys()))
    gene_idx = np.arange(len(genes))
    print("Number of unique genes:  " + str(len(genes)))

    #
    # DEFINE REFERENCE SETS
    #
    coreEss = []

    with open(essential_genes) as fin:
        skip_header = fin.readline()
        for line in fin:
            coreEss.append(line.rstrip().split('\t')[0])
    coreEss = np.array(coreEss)
    print("Number of reference essentials: " + str(len(coreEss)))

    nonEss = []
    with open(non_essential_genes) as fin:
        skip_header = fin.readline()
        for line in fin:
            nonEss.append(line.rstrip().split('\t')[0])

    nonEss = np.array(nonEss)
    print("Number of reference nonessentials: " + str(len(nonEss)))

    #
    # LOAD NETWORK
    #

    if network_boost is True:
        network = {}
        edgecount = 0
        with open(network_file) as fin:
            for line in fin:
                linearray = line.rstrip().split('\t')  # GeneA \t GeneB format
                if linearray[0] in genes_array and linearray[1] in genes_array:
                    for i in [0, 1]:
                        if linearray[i] not in network:
                            network[linearray[i]] = {}
                        network[linearray[i]][linearray[
                            -1 * (i - 1)]] = 1  # save edge information
                    edgecount += 1

        print("Number of network edges: " + str(edgecount))

    #
    # INITIALIZE BFS
    #

    # Define foldchange dynamic threshold. logarithm decay.
    # Parameters are defined by regression (achilles data)  2**-7 was used in previous version.

    FC_THRESH = 2**(
        -1.1535 * np.log(len(np.intersect1d(genes_array, nonEss)) + 13.324) +
        0.7728)
    bf = {}
    boostedbf = {}
    for g in genes_array:
        for rnatag in gene2rna[g]:
            bf[rnatag] = []

        boostedbf[g] = []  # boosted bf at gene level

    #
    # TRAINING
    #
    if use_small_sample:
        # declare training class
        # training_data = Training(setdiff1d(gene_idx,np.where(in1d(genes_array,coreEss))),cvnum=NUMCV)
        # declare training class (only for Gold-standard gene set)
        training_data = Training(np.where(
            np.in1d(genes_array, np.union1d(coreEss, nonEss)))[0],
                                 cvnum=no_of_cross_validations)
        # all non-goldstandards
        all_non_gs = np.where(
            np.logical_not(np.in1d(genes_array, np.union1d(coreEss,
                                                           nonEss))))[0]
    else:
        training_data = Training(
            gene_idx, cvnum=no_of_cross_validations)  # declare training class

    if train_method == 0:
        LOOPCOUNT = bootstrap_iterations
    elif train_method == 1:
        LOOPCOUNT = no_of_cross_validations  # 10-folds

    if run_test_mode == True:
        fp = open(output_file + ".traininfo", "w")
        fp.write("#1: Loopcount\n#2: Training set\n#3: Testset\n")

    print("Iter TrainEss TrainNon TestSet")
    sys.stdout.flush()
    for loop in range(LOOPCOUNT):
        currentbf = {}
        printstr = ""
        printstr += str(loop)

        #
        # bootstrap resample (10-folds cross-validation) from gene list to get the training set
        # test set for this iteration is everything not selected in bootstrap resampled (10-folds cross-validation)
        # training set
        # define essential and nonessential training sets:  arrays of indexes
        #

        gene_train_idx, gene_test_idx = training_data.get_data(train_method)
        if use_small_sample:
            # test set is union of rest of training set (gold-standard) and the other genes (all of non-gold-standard)
            gene_test_idx = np.union1d(gene_test_idx, all_non_gs)

        if run_test_mode:
            fp.write("%d\n%s\n%s\n" %
                     (loop, ",".join(genes_array[gene_train_idx]), ",".join(
                         genes_array[gene_test_idx])))

        train_ess = np.where(np.in1d(genes_array[gene_train_idx], coreEss))[0]
        train_non = np.where(np.in1d(genes_array[gene_train_idx], nonEss))[0]
        printstr += " " + str(len(train_ess))
        printstr += " " + str(len(train_non))
        printstr += " " + str(len(gene_test_idx))
        print(printstr)
        sys.stdout.flush()
        #
        # define ess_train: vector of observed fold changes of essential genes in training set
        #
        ess_train_fc_list_of_lists = [
            fc[rnatag] for g in genes_array[gene_train_idx[train_ess]]
            for rnatag in gene2rna[g]
        ]
        ess_train_fc_flat_list = [
            obs for sublist in ess_train_fc_list_of_lists
            for obs in list(sublist.values())
        ]
        #
        # define non_train vector of observed fold changes of nonessential genes in training set
        #
        non_train_fc_list_of_lists = [
            fc[rnatag] for g in genes_array[gene_train_idx[train_non]]
            for rnatag in gene2rna[g]
        ]
        non_train_fc_flat_list = [
            obs for sublist in non_train_fc_list_of_lists
            for obs in list(sublist.values())
        ]
        #
        # calculate empirical fold change distributions for both
        #
        kess = stats.gaussian_kde(ess_train_fc_flat_list)
        knon = stats.gaussian_kde(non_train_fc_flat_list)
        #
        # define empirical upper and lower bounds within which to calculate BF = f(fold change)
        #
        x = np.arange(-10, 2, 0.01)
        nonfitx = knon.evaluate(x)
        # define lower bound empirical fold change threshold:  minimum FC np.where knon is above threshold
        f = np.where(nonfitx > FC_THRESH)
        xmin = round_to_hundredth(min(x[f]))
        # define upper bound empirical fold change threshold:  minimum value of log2(ess/non)
        subx = np.arange(xmin, max(x[f]), 0.01)
        logratio_sample = np.log2(kess.evaluate(subx) / knon.evaluate(subx))
        f = np.where(logratio_sample == logratio_sample.min())
        xmax = round_to_hundredth(subx[f])
        #
        # round foldchanges to nearest 0.01
        # precalculate logratios and build lookup table (for speed)
        #
        logratio_lookup = {}
        for i in np.arange(xmin, xmax + 0.01, 0.01):
            logratio_lookup[np.around(i * 100)] = np.log2(
                kess.evaluate(i) / knon.evaluate(i))
        #
        # calculate BFs from lookup table for withheld test set
        #

        # liner interpolation
        testx = list()
        testy = list()

        for g in genes_array[gene_train_idx]:
            for rnatag in gene2rna[g]:
                for foldchange in list(fc[rnatag].values()):
                    if foldchange >= xmin and foldchange <= xmax:
                        testx.append(np.around(foldchange * 100) / 100)
                        testy.append(logratio_lookup[np.around(foldchange *
                                                               100)][0])
        try:
            slope, intercept, r_value, p_value, std_err = stats.linregress(
                np.array(testx), np.array(testy))
        except:
            print("Regression failed. Check quality of the screen")
            sys.exit(1)
        #
        # BF calculation
        #

        for g in genes_array[gene_test_idx]:
            for rnatag in gene2rna[g]:
                bayes_factor = []
                for rep in column_list:
                    bayes_factor.append(slope * fc[rnatag][rep] + intercept)
                bf[rnatag].append(bayes_factor)

    if run_test_mode == True:
        fp.close()

    num_obs = dict()
    if rna_level is False:
        bf_mean = dict()
        bf_std = dict()
        bf_norm = dict()  # sgRNA number complement
    if rna_level or filter_multi_target:
        bf_mean_rna_rep = dict()
        bf_std_rna_rep = dict()
    # bf_norm_rna_rep = dict()

    for g in gene2rna:
        num_obs[g] = len(bf[gene2rna[g][0]])
        if rna_level or filter_multi_target:
            for rnatag in gene2rna[g]:
                bf_mean_rna_rep[rnatag] = dict()
                bf_std_rna_rep[rnatag] = dict()
                t = list(zip(*bf[rnatag]))
                for rep in range(len(column_list)):
                    bf_mean_rna_rep[rnatag][column_list[rep]] = np.mean(t[rep])
                    bf_std_rna_rep[rnatag][column_list[rep]] = np.std(t[rep])

        if rna_level == False:
            sumofbf_list = list()
            for i in range(num_obs[g]):
                sumofbf = 0.0
                for rnatag in gene2rna[g]:
                    sumofbf += sum(bf[rnatag][i])
                sumofbf_list.append(sumofbf)  # append each iter
            bf_mean[g] = np.mean(sumofbf_list)
            bf_std[g] = np.std(sumofbf_list)

    #
    # BUILD MULTIPLE REGRESSION MODEL FOR MULTI TARGETING GUIDE RNAs
    #
    if filter_multi_target:
        count = 0
        trainset = dict()
        bf_multi_corrected_gene = dict()
        bf_multi_corrected_rna = dict()
        for gene in gene2rna:
            # multi_targeting_sgrnas_info[seqid] = (perfectmatch, mismatch_1bp, perfectmatch_gene, mismatch_1bp_gene)
            multitarget = list()
            onlytarget = list()
            for seqid in gene2rna[gene]:
                if seqid not in aligninfo.index:
                    continue
                if seqid in multi_targeting_sgrnas_info:
                    multitarget.append(seqid)
                else:
                    onlytarget.append(seqid)

            if len(
                    onlytarget
            ) > 0:  # comparsion between sgRNAs targeting one locus and multiple loci
                if len(multitarget) > 0:

                    bf_only = np.mean([
                        sum(list(bf_mean_rna_rep[seqid].values()))
                        for seqid in onlytarget
                    ])
                    for seqid in onlytarget:
                        trainset[seqid] = [1, 0, 0]

                    for seqid in multitarget:
                        if multi_targeting_sgrnas_info[seqid][
                                2] > 1 or multi_targeting_sgrnas_info[seqid][
                                    3] > 0:  # train model using multi-targeting only targeting one protein coding gene
                            continue

                        count += 1
                        increment = sum(list(
                            bf_mean_rna_rep[seqid].values())) - bf_only

                        trainset[seqid] = [
                            multi_targeting_sgrnas_info[seqid][0],
                            multi_targeting_sgrnas_info[seqid][1], increment
                        ]

        if count < 10:
            print(
                "Not enough train set for calculating multi-targeting effect.\n"
            )
            print(
                "It may cause due to unmatched gRNA names between the foldchange file and the align info file.\n"
            )
            print("Filtering is not finished\n")
            filter_multi_target = False

        else:

            trainset = pd.DataFrame().from_dict(trainset).T
            X = trainset[[0, 1]]
            y = trainset[2]

            regressor = LinearRegression()
            regressor.fit(X, y)
            coeff_df = pd.DataFrame(regressor.coef_,
                                    X.columns,
                                    columns=['Coefficient'])
            for i in [0, 1]:
                if coeff_df['Coefficient'][i] < 0:
                    print(
                        "Regression coefficient is below than zero. Substituted to zero\n"
                    )
                    coeff_df['Coefficient'][i] = 0.0
            print(
                "Multiple effects from perfect matched loci = %.3f and 1bp mis-matched loci = %.3f"
                % (coeff_df['Coefficient'][0], coeff_df['Coefficient'][1]))

            if rna_level == False:
                for g in gene2rna:
                    penalty = 0.0
                    for seqid in gene2rna[g]:
                        if seqid in multi_targeting_sgrnas_info:
                            penalty += float(
                                multi_targeting_sgrnas_info[seqid][0] -
                                1) * coeff_df['Coefficient'][0] + float(
                                    multi_targeting_sgrnas_info[seqid]
                                    [1]) * coeff_df['Coefficient'][1]
                    bf_multi_corrected_gene[g] = bf_mean[g] - penalty
            else:
                for g in gene2rna:
                    for seqid in gene2rna[g]:
                        if seqid in multi_targeting_sgrnas_info:
                            penalty = float(
                                multi_targeting_sgrnas_info[seqid][0] -
                                1) * coeff_df['Coefficient'][0] + float(
                                    multi_targeting_sgrnas_info[seqid]
                                    [1]) * coeff_df['Coefficient'][1]
                        else:
                            penalty = 0.0
                        bf_multi_corrected_rna[seqid] = sum(
                            list(bf_mean_rna_rep[seqid].values())) - penalty

    #
    #  NORMALIZE sgRNA COUNT
    #
    if rna_level is False and flat_sgrna == True:
        if filter_multi_target == True:
            targetbf = bf_multi_corrected_gene
        else:
            targetbf = bf_mean

        for g in gene2rna:
            multiple_factor = equalise_sgrna_no / float(len(gene2rna[g]))
            bf_norm[g] = targetbf[g] * multiple_factor
    '''			
    if bf_std[rnatag] == 0.0:
        bf_norm[rnatag] = float('inf')
    else:
        bf_norm[g] = ( bf[rnatag] - bf_mean[rnatag] ) / bf_std[rnatag]
    '''
    training_data = Training(gene_idx)  # set training class reset

    #
    # calculate network scores
    #

    if network_boost == True and rna_level == False:  # Network boost is only working for gene level
        if run_test_mode == True:  # TEST MODE
            fp = open(output_file + ".netscore", "w")
        print("\nNetwork score calculation start\n")

        networkscores = {}
        for g in genes_array[gene_idx]:
            if g in network:
                templist = list()
                for neighbor in network[g]:
                    if neighbor in bf_mean:
                        templist.append(bf_mean[neighbor])

                templist.sort(reverse=True)

                networkscores[g] = fibo_weighted_sum(templist)
        #
        # start training
        #

        for loop in range(LOOPCOUNT):
            currentnbf = {}
            printstr = ""
            printstr += str(loop)

            #
            # draw train, test sets
            #
            gene_train_idx, gene_test_idx = training_data.get_data(
                train_method)
            #
            # define essential and nonessential training sets:  arrays of indexes
            #
            train_ess = np.where(np.in1d(genes_array[gene_train_idx],
                                         coreEss))[0]
            train_non = np.where(np.in1d(genes_array[gene_train_idx],
                                         nonEss))[0]
            printstr += " " + str(len(train_ess))
            printstr += " " + str(len(train_non))
            printstr += " " + str(len(gene_test_idx))

            sys.stdout.flush()
            #
            # calculate Network BF for test set
            #
            ess_ns_list = [
                networkscores[x]
                for x in genes_array[gene_train_idx[train_ess]]
                if x in networkscores
            ]
            non_ns_list = [
                networkscores[x]
                for x in genes_array[gene_train_idx[train_non]]
                if x in networkscores
            ]

            kess = stats.gaussian_kde(ess_ns_list)
            knon = stats.gaussian_kde(non_ns_list)
            #
            # set x boundary for liner regression
            #
            testx = list()
            testy = list()
            xmin = float(np.inf)
            xmax = float(-np.inf)

            for networkscore in np.arange(max(ess_ns_list), min(ess_ns_list),
                                          -0.01):
                density_ess = kess.evaluate(networkscore)[0]
                density_non = knon.evaluate(networkscore)[0]
                if density_ess == 0.0 or density_non == 0.0:
                    continue

                if np.log2(density_ess /
                           density_non) > -5 and networkscore < np.array(
                               ess_ns_list).mean():  # reverse
                    xmin = min(xmin, networkscore)

            for networkscore in np.arange(min(non_ns_list), max(non_ns_list),
                                          0.01):
                density_ess = kess.evaluate(networkscore)[0]
                density_non = knon.evaluate(networkscore)[0]
                if density_ess == 0.0 or density_non == 0.0:
                    continue
                if np.log2(density_ess /
                           density_non) < 5 and networkscore > np.array(
                               non_ns_list).mean():  # reverse
                    xmax = max(xmax, networkscore)
            #
            # liner regression
            #
            testx = list()
            testy = list()
            for g in genes_array[gene_train_idx]:
                if g in networkscores:
                    if networkscores[g] >= xmin and networkscores[g] <= xmax:
                        testx.append(np.around(networkscores[g] * 100) / 100)
                        testy.append(
                            np.log2(
                                kess.evaluate(networkscores[g])[0] /
                                knon.evaluate(networkscores[g])[0]))

            slope, intercept, r_value, p_value, std_err = stats.linregress(
                np.array(testx), np.array(testy))

            for g in genes_array[gene_test_idx]:
                if g in networkscores:
                    if run_test_mode == True:
                        fp.write("%s\t%f\t%f\n" %
                                 (g, networkscores[g],
                                  slope * networkscores[g] + intercept))
                    nbf = slope * networkscores[g] + intercept
                else:
                    nbf = 0.0

                boostedbf[g].append(bf_mean[g] + nbf)
                if flat_sgrna == True:
                    boostedbf[g].append(bf_norm[g] + nbf)

        if run_test_mode == True:
            fp.close()

    #
    # print out results
    #

    # Equalizing factor (Replicates)
    if flat_rep == True:
        eqf = equalise_rep_no / float(len(column_labels))
    else:
        eqf = 1

    # print out
    with open(output_file, 'w') as fout:

        if rna_level == True:
            fout.write('RNA\tGENE')
            for i in range(len(column_list)):
                fout.write('\t{0:s}'.format(column_labels[i]))
                if train_method == 0:
                    fout.write('\t{0:s}'.format(column_labels[i] + "_STD"))
            fout.write('\tBF')
            if train_method == 0:
                fout.write('\tNumObs')
            fout.write('\n')

            for rnatag in sorted(bf.keys()):
                # RNA tag
                fout.write('{0:s}\t'.format(rnatag))
                # Gene
                gene = rna2gene[rnatag]
                fout.write('{0:s}\t'.format(gene))

                # BF of replicates
                for rep in column_list:
                    fout.write('{0:4.3f}\t'.format(
                        bf_mean_rna_rep[rnatag][rep]))
                    if train_method == 0:
                        fout.write('{0:4.3f}\t'.format(
                            bf_std_rna_rep[rnatag][rep]))

                # Sum BF of replicates
                if filter_multi_target == True:
                    fout.write('{0:4.3f}'.format(
                        float(bf_multi_corrected_rna[rnatag]) * eqf
                    ))  # eqf = equalizing factor for the number of replicates
                else:
                    fout.write('{0:4.3f}'.format(
                        float(sum(list(bf_mean_rna_rep[rnatag].values()))) *
                        eqf))

                # Num obs
                if train_method == 0:
                    fout.write('\t{0:d}'.format(num_obs[gene]))
                fout.write('\n')
        else:
            fout.write('GENE')
            if network_boost == True:
                fout.write('\tBoostedBF')
                if train_method == 0:
                    fout.write('\tSTD_BoostedBF')
            fout.write('\tBF')
            if train_method == 0:
                fout.write('\tSTD\tNumObs')
            if flat_sgrna == True:
                fout.write('\tNormBF')
            fout.write('\n')

            for g in sorted(genes.keys()):
                # Gene
                fout.write('{0:s}'.format(g))
                if network_boost == True:
                    boostedbf_mean = np.mean(boostedbf[g])
                    boostedbf_std = np.std(boostedbf[g])
                    fout.write('\t{0:4.3f}'.format(
                        float(boostedbf_mean) * eqf))
                    if train_method == 0:
                        fout.write('\t{0:4.3f}'.format(
                            float(boostedbf_std) * eqf))

                # BF
                if filter_multi_target == True:
                    fout.write('\t{0:4.3f}'.format(
                        float(bf_multi_corrected_gene[g]) * eqf
                    ))  # eqf = equalizing factor for the number of replicates
                else:
                    fout.write('\t{0:4.3f}'.format(float(bf_mean[g]) * eqf))
                # STD, Count
                if train_method == 0:
                    fout.write('\t{0:4.3f}\t{1:d}'.format(
                        float(bf_std[g]), num_obs[g]))
                # Normalized BF
                if flat_sgrna == True:
                    fout.write('\t{0:4.3f}'.format(float(bf_norm[g])))

                fout.write('\n')
Ejemplo n.º 41
0
def _make_bibc_bcbv(bus, branch, graph):
    """
    performs depth-first-search bus ordering and creates Direct Load Flow (DLF) matrix
    which establishes direct relation between bus current injections and voltage drops from each bus to the root bus

    :param ppc: matpower-type case data
    :return: DLF matrix DLF = BIBC * BCBV where
                    BIBC - Bus Injection to Branch-Current
                    BCBV - Branch-Current to Bus-Voltage
            ppc with bfs ordering
            original bus names bfs ordered (used to convert voltage array back to normal)
    """

    nobus = bus.shape[0]
    nobranch = branch.shape[0]

    # reference bus is assumed as root bus for a radial network
    refs = bus[bus[:, BUS_TYPE] == 3, BUS_I]
    norefs = len(refs)

    G = graph.copy()  # network graph

    # dictionary with impedance values keyed by branch tuple (frombus, tobus)
    # TODO use list or array, not both
    branches_lst = list(
        zip(branch[:, F_BUS].real.astype(int), branch[:,
                                                      T_BUS].real.astype(int)))
    branches_arr = branch[:, F_BUS:T_BUS + 1].real.astype(int)
    branches_ind_dict = dict(
        zip(zip(branches_arr[:, 0], branches_arr[:, 1]), range(0, nobranch)))
    branches_ind_dict.update(
        dict(
            zip(zip(branches_arr[:, 1], branches_arr[:, 0]),
                range(0, nobranch))))

    tap = branch[:, TAP]  # * np.exp(1j * np.pi / 180 * branch[:, SHIFT])
    z_ser = (branch[:, BR_R].real +
             1j * branch[:, BR_X].real) * tap  # series impedance
    z_brch_dict = dict(zip(branches_lst, z_ser))

    # initialization of lists for building sparse BIBC and BCBV matrices
    rowi_BIBC = []
    coli_BIBC = []
    data_BIBC = []
    data_BCBV = []

    buses_ordered_bfs_nets = []
    for ref in refs:
        # ordering buses according to breadth-first-search (bfs)
        buses_ordered_bfs, predecs_bfs = csgraph.breadth_first_order(
            G, ref, directed=False, return_predecessors=True)
        buses_ordered_bfs_nets.append(buses_ordered_bfs)
        branches_ordered_bfs = list(
            zip(predecs_bfs[buses_ordered_bfs[1:]], buses_ordered_bfs[1:]))
        G_tree = csgraph.breadth_first_tree(G, ref, directed=False)

        # if multiple networks get subnetwork branches
        if norefs > 1:
            branches_sub_mask = (
                np.in1d(branches_arr[:, 0], buses_ordered_bfs)
                & np.in1d(branches_arr[:, 1], buses_ordered_bfs))
            branches = np.sort(branches_arr[branches_sub_mask, :], axis=1)
        else:
            branches = np.sort(branches_arr, axis=1)

        # identify loops if graph is not a tree
        branches_loops = []
        if G_tree.nnz < branches.shape[0]:
            G_tree_nnzs = G_tree.nonzero()
            branches_tree = np.sort(np.array([G_tree_nnzs[0],
                                              G_tree_nnzs[1]]).T,
                                    axis=1)
            branches_loops = (
                set(zip(branches[:, 0], branches[:, 1])) -
                set(zip(branches_tree[:, 0], branches_tree[:, 1])))

        # #------ building BIBC and BCBV martrices ------
        # branches in trees
        brchi = 0
        for brch in branches_ordered_bfs:
            tree_down, predecs = csgraph.breadth_first_order(
                G_tree, brch[1], directed=True, return_predecessors=True)
            if len(tree_down) == 1:  # If at leaf
                pass
            if brch in z_brch_dict:
                z_br = z_brch_dict[brch]
            else:
                z_br = z_brch_dict[brch[::-1]]
            rowi_BIBC += [branches_ind_dict[brch]] * len(tree_down)
            coli_BIBC += list(tree_down)
            data_BCBV += [z_br] * len(tree_down)
            data_BIBC += [1] * len(tree_down)

        # branches from loops
        for loop_i, brch_loop in enumerate(branches_loops):
            path_lens, path_preds = csgraph.shortest_path(
                G_tree,
                directed=False,
                indices=brch_loop,
                return_predecessors=True)
            init, end = brch_loop
            loop = [end]
            while init != end:
                end = path_preds[0, end]
                loop.append(end)

            loop_size = len(loop)
            coli_BIBC += [nobus + loop_i] * loop_size
            for i in range(len(loop)):
                brch = (loop[i - 1], loop[i])
                if np.argwhere(buses_ordered_bfs == brch[0]) < np.argwhere(
                        buses_ordered_bfs == brch[1]):
                    brch_direct = 1
                else:
                    brch_direct = -1
                data_BIBC.append(brch_direct)

                if brch in branches_ind_dict:
                    rowi_BIBC.append(branches_ind_dict[brch])
                else:
                    rowi_BIBC.append(branches_ind_dict[brch[::-1]])

                if brch in z_brch_dict:
                    data_BCBV.append(z_brch_dict[brch] * brch_direct)
                else:
                    data_BCBV.append(z_brch_dict[brch[::-1]] * brch_direct)

                brchi += 1

    # construction of the BIBC matrix
    # column indices correspond to buses: assuming root bus is always 0 after ordering indices are subtracted by 1
    BIBC = csr_matrix((data_BIBC, (rowi_BIBC, np.array(coli_BIBC) - norefs)),
                      shape=(nobranch, nobranch))
    BCBV = csr_matrix((data_BCBV, (rowi_BIBC, np.array(coli_BIBC) - norefs)),
                      shape=(nobranch, nobranch)).transpose()

    if BCBV.shape[0] > nobus - 1:  # if nbrch > nobus - 1 -> network has loops
        DLF_loop = BCBV * BIBC
        # DLF = [A  M.T ]
        #       [M  N   ]
        A = DLF_loop[0:nobus - 1, 0:nobus - 1]
        M = DLF_loop[nobus - 1:, 0:nobus - 1]
        N = DLF_loop[nobus - 1:, nobus - 1:].A
        # considering the fact that number of loops is relatively small, N matrix is expected to be small and dense
        # ...in that case dense version is more efficient, i.e. N is transformed to dense and
        # inverted using sp.linalg.inv(N)
        DLF = A - M.T * csr_matrix(sp.linalg.inv(N)) * M  # Kron's Reduction
    else:  # no loops -> radial network
        DLF = BCBV * BIBC

    return DLF, buses_ordered_bfs_nets
def own_def(roilist,
            sub,
            nClusters,
            scan,
            scan_type,
            savepng=0,
            session=1,
            algo=0,
            type_cor=0):
    p_dir = '/home/ajoshi/data/HCP_data'
    r_factor = 3
    ref_dir = os.path.join(p_dir, 'reference')
    ref = '100307'
    fn1 = ref + '.reduce' + str(r_factor) + '.LR_mask.mat'
    fname1 = os.path.join(ref_dir, fn1)
    msk = scipy.io.loadmat(fname1)  # h5py.File(fname1);

    #dfs_left = readdfs(os.path.join(p_dir, 'reference', ref + '.aparc.a2009s.32k_fs.reduce3.' + 'left' + '.dfs'))
    #dfs_left_sm = readdfs(os.path.join(p_dir, 'reference', ref + '.aparc.\
    #a2009s.32k_fs.reduce3.very_smooth.' + 'left' + '.dfs'))

    dfs_left_sm = readdfs(
        os.path.join('/home/ajoshi/for_gaurav',
                     '100307.BCI2reduce3.very_smooth.' + scan_type + '.dfs'))
    dfs_left = readdfs(
        os.path.join('/home/ajoshi/for_gaurav',
                     '100307.BCI2reduce3.very_smooth.' + scan_type + '.dfs'))

    data = scipy.io.loadmat(
        os.path.join(
            p_dir, 'data', sub, sub + '.rfMRI_REST' + str(session) + scan +
            '.reduce3.ftdata.NLM_11N_hvar_25.mat'))

    LR_flag = msk['LR_flag']
    # 0= right hemisphere && 1== left hemisphere
    if scan_type == 'right':
        LR_flag = np.squeeze(LR_flag) == 0
    else:
        LR_flag = np.squeeze(LR_flag) == 1
    data = data['ftdata_NLM']
    temp = data[LR_flag, :]
    m = np.mean(temp, 1)
    temp = temp - m[:, None]
    s = np.std(temp, 1) + 1e-16
    temp = temp / s[:, None]
    msk_small_region = np.in1d(dfs_left.labels, roilist)
    d = temp[msk_small_region, :]
    rho = np.corrcoef(d)
    rho[~np.isfinite(rho)] = 0
    if algo == 0:
        SC = SpectralClustering(n_clusters=nClusters, affinity='precomputed')
        labels = SC.fit_predict(np.abs(rho))

    if savepng > 0:
        r = dfs_left_sm
        r.labels = np.zeros([r.vertices.shape[0]])
        r.labels[msk_small_region] = labels + 1

        mlab.triangular_mesh(r.vertices[:, 0],
                             r.vertices[:, 1],
                             r.vertices[:, 2],
                             r.faces,
                             representation='surface',
                             opacity=1,
                             scalars=np.float64(r.labels))

        mlab.gcf().scene.parallel_projection = True
        mlab.view(azimuth=0, elevation=-90)
        mlab.colorbar(orientation='horizontal')
        #mlab.show()
        mlab.savefig(filename='clusters_' + str(nClusters) + '_rois_' +
                     str(roilist) + 'subject_' + sub + 'session' +
                     str(session) + '_labels.png')
        mlab.show()
def biot_convergence_in_space(N):
    # coding: utf-8

    # ### Source terms and analytical solutions

    # In[330]:

    def source_flow(g, tau):

        x1 = g.cell_centers[0]
        x2 = g.cell_centers[1]

        f_flow = tau*(2*np.sin(2*np.pi*x2) - \
                 4*x1*np.pi**2*np.sin(2*np.pi*x2)*(x1 - 1)) - \
                 x1*np.sin(2*np.pi*x2) - \
                 np.sin(2*np.pi*x2)*(x1 - 1) + \
                 2*np.pi*np.cos(2*np.pi*x2)*np.sin(2*np.pi*x1)

        return f_flow

    def source_mechanics(g):

        x1 = g.cell_centers[0]
        x2 = g.cell_centers[1]

        f_mech = np.zeros(g.num_cells * g.dim)

        f_mech[::2] = 6*np.sin(2*np.pi*x2) - \
                      x1*np.sin(2*np.pi*x2) -  \
                      np.sin(2*np.pi*x2)*(x1 - 1) - \
                      8*np.pi**2*np.cos(2*np.pi*x1)*np.cos(2*np.pi*x2) - \
                      4*x1*np.pi**2*np.sin(2*np.pi*x2)*(x1 - 1)

        f_mech[1::2] = 4*np.pi*np.cos(2*np.pi*x2)*(x1 - 1) + \
                       16*np.pi**2*np.sin(2*np.pi*x1)*np.sin(2*np.pi*x2) + \
                       4*x1*np.pi*np.cos(2*np.pi*x2) - \
                       2*x1*np.pi*np.cos(2*np.pi*x2)*(x1 - 1)

        return f_mech

    def analytical(g):

        sol = dict()
        x1 = g.cell_centers[0]
        x2 = g.cell_centers[1]

        sol['u'] = np.zeros(g.num_cells * g.dim)
        sol['u'][::2] = x1 * (1 - x1) * np.sin(2 * np.pi * x2)
        sol['u'][1::2] = np.sin(2 * np.pi * x1) * np.sin(2 * np.pi * x2)

        sol['p'] = sol['u'][::2]

        return sol

    # ### Getting mechanics boundary conditions

    # In[331]:

    def get_bc_mechanics(g, b_faces, x_min, x_max, west, east, y_min, y_max,
                         south, north):

        # Setting the tags at each boundary side for the mechanics problem
        labels_mech = np.array([None] * b_faces.size)
        labels_mech[west] = 'dir'
        labels_mech[east] = 'dir'
        labels_mech[south] = 'dir'
        labels_mech[north] = 'dir'

        # Constructing the bc object for the mechanics problem
        bc_mech = pp.BoundaryConditionVectorial(g, b_faces, labels_mech)

        # Constructing the boundary values array for the mechanics problem
        bc_val_mech = np.zeros(g.num_faces * g.dim)

        return bc_mech, bc_val_mech

    # ### Getting flow boundary conditions

    # In[332]:

    def get_bc_flow(g, b_faces, x_min, x_max, west, east, y_min, y_max, south,
                    north):

        # Setting the tags at each boundary side for the mechanics problem
        labels_flow = np.array([None] * b_faces.size)
        labels_flow[west] = 'dir'
        labels_flow[east] = 'dir'
        labels_flow[south] = 'dir'
        labels_flow[north] = 'dir'

        # Constructing the bc object for the flow problem
        bc_flow = pp.BoundaryCondition(g, b_faces, labels_flow)

        # Constructing the boundary values array for the flow problem
        bc_val_flow = np.zeros(g.num_faces)

        return bc_flow, bc_val_flow

    # ### Setting up the grid

    # In[333]:

    Nx = Ny = N
    Lx = 1
    Ly = 1
    g = pp.CartGrid([Nx, Ny], [Lx, Ly])
    g.compute_geometry()
    V = g.cell_volumes

    # ### Physical parameters

    # In[334]:

    # Skeleton parameters
    mu_s = 1  # [Pa] Shear modulus
    lambda_s = 1  # [Pa] Lame parameter
    K_s = (2 / 3) * mu_s + lambda_s  # [Pa] Bulk modulus
    E_s = mu_s * ((9 * K_s) / (3 * K_s + mu_s))  # [Pa] Young's modulus
    nu_s = (3 * K_s - 2 * mu_s) / (2 * (3 * K_s + mu_s)
                                   )  # [-] Poisson's coefficient
    k_s = 1  # [m^2] Permeabiliy

    # Fluid parameters
    mu_f = 1  # [Pa s] Dynamic viscosity

    # Porous medium parameters
    alpha_biot = 1.  # [m^2] Intrinsic permeability
    S_m = 0  # [1/Pa] Specific Storage

    # ### Creating second and fourth order tensors

    # In[335]:

    # Permeability tensor
    perm = pp.SecondOrderTensor(g.dim, k_s * np.ones(g.num_cells))

    # Stiffness matrix
    constit = pp.FourthOrderTensor(g.dim, mu_s * np.ones(g.num_cells),
                                   lambda_s * np.ones(g.num_cells))

    # ### Time parameters

    # In[336]:

    t0 = 0  # [s] Initial time
    tf = 1  # [s] Final simulation time
    tLevels = 1  # [-] Time levels
    times = np.linspace(t0, tf, tLevels + 1)  # [s] Vector of time evaluations
    dt = np.diff(times)  # [s] Vector of time steps

    # ### Boundary conditions pre-processing

    # In[337]:

    b_faces = g.tags['domain_boundary_faces'].nonzero()[0]

    # Extracting indices of boundary faces w.r.t g
    x_min = b_faces[g.face_centers[0, b_faces] < 0.0001]
    x_max = b_faces[g.face_centers[0, b_faces] > 0.9999 * Lx]
    y_min = b_faces[g.face_centers[1, b_faces] < 0.0001]
    y_max = b_faces[g.face_centers[1, b_faces] > 0.9999 * Ly]

    # Extracting indices of boundary faces w.r.t b_faces
    west = np.in1d(b_faces, x_min).nonzero()
    east = np.in1d(b_faces, x_max).nonzero()
    south = np.in1d(b_faces, y_min).nonzero()
    north = np.in1d(b_faces, y_max).nonzero()

    # Mechanics boundary conditions
    bc_mech, bc_val_mech = get_bc_mechanics(g, b_faces, x_min, x_max, west,
                                            east, y_min, y_max, south, north)
    # FLOW BOUNDARY CONDITIONS
    bc_flow, bc_val_flow = get_bc_flow(g, b_faces, x_min, x_max, west, east,
                                       y_min, y_max, south, north)

    # ### Initialiazing solution and solver dicitionaries

    # In[338]:

    # Solution dictionary
    sol = dict()
    sol['time'] = np.zeros(tLevels + 1, dtype=float)
    sol['displacement'] = np.zeros((tLevels + 1, g.num_cells * g.dim),
                                   dtype=float)
    sol['displacement_faces'] = np.zeros(
        (tLevels + 1, g.num_faces * g.dim * 2), dtype=float)
    sol['pressure'] = np.zeros((tLevels + 1, g.num_cells), dtype=float)
    sol['traction'] = np.zeros((tLevels + 1, g.num_faces * g.dim), dtype=float)
    sol['flux'] = np.zeros((tLevels + 1, g.num_faces), dtype=float)
    sol['iter'] = np.array([], dtype=int)
    sol['time_step'] = np.array([], dtype=float)
    sol['residual'] = np.array([], dtype=float)

    # Solver dictionary
    newton_param = dict()
    newton_param['tol'] = 1E-8  # maximum tolerance
    newton_param['max_iter'] = 20  # maximum number of iterations
    newton_param['res_norm'] = 1000  # initializing residual
    newton_param['iter'] = 1  # iteration

    # ### Discrete operators and discrete equations

    # ### Flow operators

    # In[339]:

    F = lambda x: biot_F * x  # Flux operator
    boundF = lambda x: biot_boundF * x  # Bound Flux operator
    compat = lambda x: biot_compat * x  # Compatibility operator (Stabilization term)
    divF = lambda x: biot_divF * x  # Scalar divergence operator

    # ### Mechanics operators

    # In[340]:

    S = lambda x: biot_S * x  # Stress operator
    boundS = lambda x: biot_boundS * x  # Bound Stress operator
    divU = lambda x: biot_divU * x  # Divergence of displacement field
    divS = lambda x: biot_divS * x  # Vector divergence operator
    gradP = lambda x: biot_divS * biot_gradP * x  # Pressure gradient operator
    boundDivU = lambda x: biot_boundDivU * x  # Bound Divergence of displacement operator
    boundUCell = lambda x: biot_boundUCell * x  # Contribution of displacement at cells -> Face displacement
    boundUFace = lambda x: biot_boundUFace * x  # Contribution of bc_mech at the boundaries -> Face displacement
    boundUPressure = lambda x: biot_boundUPressure * x  # Contribution of pressure at cells -> Face displacement

    # ### Discrete equations

    # In[341]:

    # Source terms
    f_mech = source_mechanics(g)
    f_flow = source_flow(g, dt[0])

    # Generalized Hooke's law
    T = lambda u: S(u) + boundS(bc_val_mech)

    # Momentum conservation equation (I)
    u_eq1 = lambda u: divS(T(u))

    # Momentum conservation equation (II)
    u_eq2 = lambda p: -gradP(p) + f_mech * V[0]

    # Darcy's law
    Q = lambda p: (1. / mu_f) * (F(p) + boundF(bc_val_flow))

    # Mass conservation equation (I)
    p_eq1 = lambda u, u_n: alpha_biot * divU(u - u_n)

    # Mass conservation equation (II)
    p_eq2 = lambda p, p_n, dt: (p - p_n) * S_m * V + divF(Q(
        p)) * dt + alpha_biot * compat(p - p_n) * V[0] - (f_flow / dt) * V[0]

    # ## Creating AD variables

    # In[343]:

    # Create displacement AD-variable
    u_ad = Ad_array(np.zeros(g.num_cells * 2),
                    sps.diags(np.ones(g.num_cells * g.dim)))

    # Create pressure AD-variable
    p_ad = Ad_array(np.zeros(g.num_cells), sps.diags(np.ones(g.num_cells)))

    # ## Performing discretization

    # In[344]:

    d = dict()  # initialize dictionary to store data

    # Mechanics data object
    specified_parameters_mech = {
        "fourth_order_tensor": constit,
        "bc": bc_mech,
        "biot_alpha": 1.,
        "bc_values": bc_val_mech
    }
    pp.initialize_default_data(g, d, "mechanics", specified_parameters_mech)

    # Flow data object
    specified_parameters_flow = {
        "second_order_tensor": perm,
        "bc": bc_flow,
        "biot_alpha": 1.,
        "bc_values": bc_val_flow
    }
    pp.initialize_default_data(g, d, "flow", specified_parameters_flow)

    # Biot discretization
    solver_biot = pp.Biot("mechanics", "flow")
    solver_biot.discretize(g, d)

    # Mechanics discretization matrices
    biot_S = d['discretization_matrices']['mechanics']['stress']
    biot_boundS = d['discretization_matrices']['mechanics']['bound_stress']
    biot_divU = d['discretization_matrices']['mechanics']['div_d']
    biot_gradP = d['discretization_matrices']['mechanics']['grad_p']
    biot_boundDivU = d['discretization_matrices']['mechanics']['bound_div_d']
    biot_boundUCell = d['discretization_matrices']['mechanics'][
        'bound_displacement_cell']
    biot_boundUFace = d['discretization_matrices']['mechanics'][
        'bound_displacement_face']
    biot_boundUPressure = d['discretization_matrices']['mechanics'][
        'bound_displacement_pressure']
    biot_divS = pp.fvutils.vector_divergence(g)

    # Flow discretization matrices
    biot_F = d['discretization_matrices']['flow']['flux']
    biot_boundF = d['discretization_matrices']['flow']['bound_flux']
    biot_compat = d['discretization_matrices']['flow']['biot_stabilization']
    biot_divF = pp.fvutils.scalar_divergence(g)

    # Saving initial condition
    sol['pressure'][0] = p_ad.val
    sol['displacement'][0] = u_ad.val
    sol['displacement_faces'][0] = (boundUCell(sol['displacement'][0]) +
                                    boundUFace(bc_val_mech) +
                                    boundUPressure(sol['pressure'][0]))
    sol['time'][0] = times[0]
    sol['traction'][0] = T(u_ad.val)
    sol['flux'][0] = Q(p_ad.val)

    # ## The time loop

    # In[345]:

    tt = 0  # time counter

    while times[tt] < times[-1]:

        tt += 1  # increasing time counter

        # Displacement and pressure at the previous time step
        u_n = u_ad.val.copy()
        p_n = p_ad.val.copy()

        # Updating residual and iteration at each time step
        newton_param.update({'res_norm': 1000, 'iter': 1})

        # Newton loop
        while newton_param['res_norm'] > newton_param['tol'] and newton_param[
                'iter'] <= newton_param['max_iter']:

            # Calling equations
            eq1 = u_eq1(u_ad)
            eq2 = u_eq2(p_ad)
            eq3 = p_eq1(u_ad, u_n)
            eq4 = p_eq2(p_ad, p_n, dt[tt - 1])

            # Assembling Jacobian of the coupled system
            J_mech = np.hstack(
                (eq1.jac, eq2.jac))  # Jacobian blocks (mechanics)
            J_flow = np.hstack((eq3.jac, eq4.jac))  # Jacobian blocks (flow)
            J = sps.bmat(np.vstack((J_mech, J_flow)),
                         format='csc')  # Jacobian (coupled)

            # Determining residual of the coupled system
            R_mech = eq1.val + eq2.val  # Residual (mechanics)
            R_flow = eq3.val + eq4.val  # Residual (flow)
            R = np.hstack((R_mech, R_flow))  # Residual (coupled)

            y = sps.linalg.spsolve(J, -R)  #
            u_ad.val = u_ad.val + y[:g.dim * g.num_cells]  # Newton update
            p_ad.val = p_ad.val + y[g.dim * g.num_cells:]  #

            newton_param['res_norm'] = np.linalg.norm(R)  # Updating residual

            if newton_param['res_norm'] <= newton_param[
                    'tol'] and newton_param['iter'] <= newton_param['max_iter']:
                print('Iter: {} \t Error: {:.8f} [m]'.format(
                    newton_param['iter'], newton_param['res_norm']))
            elif newton_param['iter'] > newton_param['max_iter']:
                print('Error: Newton method did not converge!')
            else:
                newton_param['iter'] += 1

        # Saving variables
        sol['iter'] = np.concatenate(
            (sol['iter'], np.array([newton_param['iter']])))
        sol['residual'] = np.concatenate(
            (sol['residual'], np.array([newton_param['res_norm']])))
        sol['time_step'] = np.concatenate((sol['time_step'], dt))
        sol['pressure'][tt] = p_ad.val
        sol['displacement'][tt] = u_ad.val
        sol['displacement_faces'][tt] = (boundUCell(sol['displacement'][tt]) +
                                         boundUFace(bc_val_mech) +
                                         boundUPressure(sol['pressure'][tt]))
        sol['time'][tt] = times[tt]
        sol['traction'][tt] = T(u_ad.val)
        sol['flux'][tt] = Q(p_ad.val)

        # Determining analytical solution
        sol_anal = analytical(g)

        # Determining norms
        p_norm = np.linalg.norm(sol_anal['p'] - sol['pressure'][-1]) / (
            np.linalg.norm(sol['pressure'][-1]))
        u_mag_num = np.sqrt(sol['displacement'][-1][::2]**2 +
                            sol['displacement'][-1][1::2]**2)
        u_mag_ana = np.sqrt(sol_anal['u'][::2]**2 + sol_anal['u'][1::2]**2)
        u_norm = np.linalg.norm(u_mag_ana -
                                u_mag_num) / np.linalg.norm(u_mag_num)

        return p_norm, u_norm
    def _bravyi_kitaev_mode(self, n):
        """
        Bravyi-Kitaev mode.

        Args:
            n (int): number of modes
         Returns:
             numpy.ndarray: Array of mode indexes
        """

        def parity_set(j, n):
            """Computes the parity set of the j-th orbital in n modes.

            Args:
                j (int) : the orbital index
                n (int) : the total number of modes

            Returns:
                numpy.ndarray: Array of mode indexes
            """
            indexes = np.array([])
            if n % 2 != 0:
                return indexes

            if j < n / 2:
                indexes = np.append(indexes, parity_set(j, n / 2))
            else:
                indexes = np.append(indexes, np.append(
                    parity_set(j - n / 2, n / 2) + n / 2, n / 2 - 1))
            return indexes

        def update_set(j, n):
            """Computes the update set of the j-th orbital in n modes.

            Args:
                j (int) : the orbital index
                n (int) : the total number of modes

            Returns:
                numpy.ndarray: Array of mode indexes
            """
            indexes = np.array([])
            if n % 2 != 0:
                return indexes
            if j < n / 2:
                indexes = np.append(indexes, np.append(
                    n - 1, update_set(j, n / 2)))
            else:
                indexes = np.append(indexes, update_set(j - n / 2, n / 2) + n / 2)
            return indexes

        def flip_set(j, n):
            """Computes the flip set of the j-th orbital in n modes.

            Args:
                j (int) : the orbital index
                n (int) : the total number of modes

            Returns:
                numpy.ndarray: Array of mode indexes
            """
            indexes = np.array([])
            if n % 2 != 0:
                return indexes
            if j < n / 2:
                indexes = np.append(indexes, flip_set(j, n / 2))
            elif j >= n / 2 and j < n - 1:  # pylint: disable=chained-comparison
                indexes = np.append(indexes, flip_set(j - n / 2, n / 2) + n / 2)
            else:
                indexes = np.append(np.append(indexes, flip_set(
                    j - n / 2, n / 2) + n / 2), n / 2 - 1)
            return indexes

        a_list = []
        # FIND BINARY SUPERSET SIZE
        bin_sup = 1
        # pylint: disable=comparison-with-callable
        while n > np.power(2, bin_sup):
            bin_sup += 1
        # DEFINE INDEX SETS FOR EVERY FERMIONIC MODE
        update_sets = []
        update_pauli = []

        parity_sets = []
        parity_pauli = []

        flip_sets = []

        remainder_sets = []
        remainder_pauli = []
        for j in range(n):

            update_sets.append(update_set(j, np.power(2, bin_sup)))
            update_sets[j] = update_sets[j][update_sets[j] < n]

            parity_sets.append(parity_set(j, np.power(2, bin_sup)))
            parity_sets[j] = parity_sets[j][parity_sets[j] < n]

            flip_sets.append(flip_set(j, np.power(2, bin_sup)))
            flip_sets[j] = flip_sets[j][flip_sets[j] < n]

            remainder_sets.append(np.setdiff1d(parity_sets[j], flip_sets[j]))

            update_pauli.append(Pauli(np.zeros(n, dtype=np.bool), np.zeros(n, dtype=np.bool)))
            parity_pauli.append(Pauli(np.zeros(n, dtype=np.bool), np.zeros(n, dtype=np.bool)))
            remainder_pauli.append(Pauli(np.zeros(n, dtype=np.bool), np.zeros(n, dtype=np.bool)))
            for k in range(n):
                if np.in1d(k, update_sets[j]):
                    update_pauli[j].update_x(True, k)
                if np.in1d(k, parity_sets[j]):
                    parity_pauli[j].update_z(True, k)
                if np.in1d(k, remainder_sets[j]):
                    remainder_pauli[j].update_z(True, k)

            x_j = Pauli(np.zeros(n, dtype=np.bool), np.zeros(n, dtype=np.bool))
            x_j.update_x(True, j)
            y_j = Pauli(np.zeros(n, dtype=np.bool), np.zeros(n, dtype=np.bool))
            y_j.update_z(True, j)
            y_j.update_x(True, j)
            a_list.append((update_pauli[j] * x_j * parity_pauli[j],
                           update_pauli[j] * y_j * remainder_pauli[j]))
        return a_list
def detect_peaks(x,
                 mph=None,
                 mpd=1,
                 threshold=0,
                 edge='rising',
                 kpsh=False,
                 valley=False,
                 show=False,
                 ax=None):
    """
    Returns
    -------
    ind : 1D array_like
        indeces of the peaks in `x`.

    Notes
    -----
    The detection of valleys instead of peaks is performed internally by simply
    negating the data: `ind_valleys = detect_peaks(-x)`
    
    The function can handle NaN's 
 
    Copyright 2015. Marcos Duarte. MIT license.

    """

    x = np.atleast_1d(x).astype('float64')
    if x.size < 3:
        return np.array([], dtype=int)
    if valley:
        x = -x
    # find indices of all peaks
    dx = x[1:] - x[:-1]
    # handle NaN's
    indnan = np.where(np.isnan(x))[0]
    if indnan.size:
        x[indnan] = np.inf
        dx[np.where(np.isnan(dx))[0]] = np.inf
    ine, ire, ife = np.array([[], [], []], dtype=int)
    if not edge:
        ine = np.where((np.hstack((dx, 0)) < 0) & (np.hstack((0, dx)) > 0))[0]
    else:
        if edge.lower() in ['rising', 'both']:
            ire = np.where((np.hstack((dx, 0)) <= 0)
                           & (np.hstack((0, dx)) > 0))[0]
        if edge.lower() in ['falling', 'both']:
            ife = np.where((np.hstack((dx, 0)) < 0)
                           & (np.hstack((0, dx)) >= 0))[0]
    ind = np.unique(np.hstack((ine, ire, ife)))
    # handle NaN's
    if ind.size and indnan.size:
        # NaN's and values close to NaN's cannot be peaks
        ind = ind[np.in1d(ind,
                          np.unique(np.hstack(
                              (indnan, indnan - 1, indnan + 1))),
                          invert=True)]
    # first and last values of x cannot be peaks
    if ind.size and ind[0] == 0:
        ind = ind[1:]
    if ind.size and ind[-1] == x.size - 1:
        ind = ind[:-1]
    # remove peaks < minimum peak height
    if ind.size and mph is not None:
        ind = ind[x[ind] >= mph]
    # remove peaks - neighbors < threshold
    if ind.size and threshold > 0:
        dx = np.min(np.vstack([x[ind] - x[ind - 1], x[ind] - x[ind + 1]]),
                    axis=0)
        ind = np.delete(ind, np.where(dx < threshold)[0])
    # detect small peaks closer than minimum peak distance
    if ind.size and mpd > 1:
        ind = ind[np.argsort(x[ind])][::-1]  # sort ind by peak height
        idel = np.zeros(ind.size, dtype=bool)
        for i in range(ind.size):
            if not idel[i]:
                # keep peaks with the same height if kpsh is True
                idel = idel | (ind >= ind[i] - mpd) & (ind <= ind[i] + mpd) \
                    & (x[ind[i]] > x[ind] if kpsh else True)
                idel[i] = 0  # Keep current peak
        # remove the small peaks and sort back the indices by their occurrence
        ind = np.sort(ind[~idel])
    return ind
Ejemplo n.º 46
0
def _bfswpf(DLF, bus, gen, branch, baseMVA, Ybus, Sbus, V0, ref, pv, pq,
            buses_ordered_bfs_nets, options, **kwargs):
    """
    distribution power flow solution according to [1]
    :param DLF: direct-Load-Flow matrix which relates bus current injections to voltage drops from the root bus

    :param bus: buses martix
    :param gen: generators matrix
    :param branch: branches matrix
    :param baseMVA:
    :param Ybus: bus admittance matrix
    :param Sbus: vector of power injections
    :param V0: initial voltage state vector
    :param ref: reference bus index
    :param pv: PV buses indices
    :param pq: PQ buses indices
    :param buses_ordered_bfs_nets: buses ordered according to breadth-first search

    :return: power flow result
    """
    enforce_q_lims = options["enforce_q_lims"]
    tolerance_mva = options["tolerance_mva"]
    max_iteration = options["max_iteration"]
    voltage_depend_loads = options["voltage_depend_loads"]
    # setting options
    max_it = max_iteration  # maximum iterations
    verbose = kwargs["VERBOSE"]  # verbose is set in run._runpppf() #

    # tolerance for the inner loop for PV nodes
    if 'tolerance_mva_pv' in kwargs:
        tol_mva_inner = kwargs['tolerance_mva_pv']
    else:
        tol_mva_inner = 1.e-2

    if 'max_iter_pv' in kwargs:
        max_iter_pv = kwargs['max_iter_pv']
    else:
        max_iter_pv = 20

    nobus = bus.shape[0]
    ngen = gen.shape[0]

    mask_root = ~(bus[:, BUS_TYPE] == 3)  # mask for eliminating root bus
    norefs = len(ref)

    Ysh = _makeYsh_bfsw(bus, branch, baseMVA)

    # detect generators on PV buses which have status ON
    gen_pv = np.in1d(gen[:, GEN_BUS], pv) & (gen[:, GEN_STATUS] > 0)
    qg_lim = np.zeros(
        ngen, dtype=bool)  # initialize generators which violated Q limits

    Iinj = np.conj(Sbus / V0) - Ysh * V0  # Initial current injections

    # initiate reference voltage vector
    V_ref = np.ones(nobus, dtype=complex)
    for neti, buses_ordered_bfs in enumerate(buses_ordered_bfs_nets):
        V_ref[buses_ordered_bfs] *= V0[ref[neti]]
    V = V0.copy()

    n_iter = 0
    converged = 0
    if verbose:
        print(' -- AC Power Flow (Backward/Forward sweep)\n')

    while not converged and n_iter < max_it:
        n_iter_inner = 0
        n_iter += 1

        deltaV = DLF * Iinj[mask_root]
        V[mask_root] = V_ref[mask_root] + deltaV

        # ##
        # inner loop for considering PV buses
        # TODO improve PV buses inner loop
        inner_loop_converged = False
        while not inner_loop_converged and len(pv) > 0:

            pvi = pv - norefs  # internal PV buses indices, assuming reference node is always 0

            Vmis = (np.abs(gen[gen_pv, VG]))**2 - (np.abs(V[pv]))**2
            # TODO improve getting values from sparse DLF matrix - DLF[pvi, pvi] is unefficient
            dQ = (Vmis / (2 * DLF[pvi, pvi].A1.imag)).flatten()

            gen[gen_pv, QG] += dQ

            if enforce_q_lims:  # check Q violation limits
                ## find gens with violated Q constraints
                qg_max_lim = (gen[:, QG] > gen[:, QMAX]) & gen_pv
                qg_min_lim = (gen[:, QG] < gen[:, QMIN]) & gen_pv

                if qg_min_lim.any():
                    gen[qg_min_lim, QG] = gen[qg_min_lim, QMIN]
                    bus[gen[qg_min_lim, GEN_BUS].astype(int),
                        BUS_TYPE] = 1  # convert to PQ bus

                if qg_max_lim.any():
                    gen[qg_max_lim, QG] = gen[qg_max_lim, QMAX]
                    bus[gen[qg_max_lim, GEN_BUS].astype(int),
                        BUS_TYPE] = 1  # convert to PQ bus

                # TODO: correct: once all the PV buses are converted to PQ buses, conversion back to PV is not possible
                qg_lim_new = qg_min_lim | qg_max_lim
                if qg_lim_new.any():
                    pq2pv = (qg_lim != qg_lim_new) & qg_lim
                    # convert PQ to PV bus
                    if pq2pv.any():
                        bus[gen[qg_max_lim, GEN_BUS].astype(int),
                            BUS_TYPE] = 2  # convert to PV bus

                    qg_lim = qg_lim_new.copy()
                    ref, pv, pq = bustypes(bus, gen)

            # avoid calling makeSbus, update only Sbus for pv nodes
            Sbus = (makeSbus(baseMVA, bus, gen, vm=abs(V))
                    if voltage_depend_loads else makeSbus(baseMVA, bus, gen))
            Iinj = np.conj(Sbus / V) - Ysh * V
            deltaV = DLF * Iinj[mask_root]
            V[mask_root] = V_ref[mask_root] + deltaV

            if n_iter_inner > max_iter_pv:
                raise LoadflowNotConverged(
                    " FBSW Power Flow did not converge - inner iterations for PV nodes "
                    "reached maximum value of {0}!".format(max_iter_pv))

            n_iter_inner += 1

            if np.all(np.abs(dQ) < tol_mva_inner
                      ):  # inner loop termination criterion
                inner_loop_converged = True

        # testing termination criterion -
        if voltage_depend_loads:
            Sbus = makeSbus(baseMVA, bus, gen, vm=abs(V))
        F = _evaluate_Fx(Ybus, V, Sbus, ref, pv, pq)
        # check tolerance
        converged = _check_for_convergence(F, tolerance_mva)

        if converged and verbose:
            print("\nFwd-back sweep power flow converged in "
                  "{0} iterations.\n".format(n_iter))

        # updating injected currents
        Iinj = np.conj(Sbus / V) - Ysh * V

    return V, converged
Ejemplo n.º 47
0

#####################################

terms = getTerms.get()  # get dictionary of terms for each subject

tmp = np.recfromcsv(
    'filename_subject_list.csv')  # read in list of file vs subj
docSubj = dict(zip(tmp['filename'], tmp['subject']))  # dict of name and subj

tmp = np.recfromtxt('./outFiles/part-r-00000',
                    delimiter='\t')  # output and topN
files = tmp[:, 0]
topN = tmp[:, 1:]

val = np.empty(topN.shape, dtype=bool)
for n, (f, t) in enumerate(zip(files,
                               topN)):  # see if topN term within subject
    val[n] = np.in1d(t, terms[docSubj[f]])

head = np.char.add('term', np.char.mod('%d',
                                       range(1, val.shape[1] + 1)))  # header
valString = np.char.mod('%i', val)  # convert True/False into '1'/'0'

np.savetxt('./outFiles/validation.csv',
           np.hstack((files[:, None], valString)),
           fmt='%s',
           delimiter=',',
           header='fileName,' + ','.join(head),
           comments='')
    def run(self, workspace):
        x_name = self.x_name.value
        y_name = self.y_name.value
        object_set = workspace.object_set

        x = object_set.get_objects(x_name)
        x_data = x.segmented

        dimensions = x.dimensions
        y_data = x.segmented.copy()

        reference_name = self.reference_name.value
        reference = object_set.get_objects(reference_name)
        reference_data = reference.segmented

        # Get the parent object labels
        outer_labels = numpy.unique(reference_data)

        if self.remove_orphans.value:
            # Get the child object labels
            inner_labels = numpy.unique(x_data)
            # Find the discrepancies between child and parent
            orphans = numpy.setdiff1d(inner_labels, outer_labels)
            # Remove them from the original array
            orphan_mask = numpy.in1d(x_data, orphans)
            # orphan_mask here is a 1D array, but it has the same number of elements
            # as y_data. Since we know that, we can reshape it to the original array
            # shape and use it as a boolean mask to take out the orphaned objects
            y_data[orphan_mask.reshape(x_data.shape)] = 0

        for obj in outer_labels:
            # Ignore the background
            if obj == 0:
                continue

            # Find where in the array the child object is outside of the
            # parent object (i.e. where the original array is *not* that
            # object *and* where the child array is that object)
            constrain_mask = (reference_data != obj) & (x_data == obj)
            # Remove those parts outside the parent
            y_data[constrain_mask] = 0

            # Only remove intruding pieces if the user has requested it
            if self.coersion_method.value == METHOD_REMOVE:
                intrude_mask = (reference_data
                                == obj) & (x_data != obj) & (x_data != 0)
                y_data[intrude_mask] = 0

        objects = cellprofiler_core.object.Objects()

        objects.segmented = y_data
        objects.parent_image = x.parent_image

        workspace.object_set.add_objects(objects, y_name)

        self.add_measurements(workspace)

        if self.show_window:
            workspace.display_data.x_data = x_data

            workspace.display_data.y_data = y_data

            workspace.display_data.reference = reference_data

            workspace.display_data.dimensions = dimensions
Ejemplo n.º 49
0
print("found ", nr.nodeIndex, " nodes")
print("found ", wr.edgeIndex, " edges")
# sort edges by source node index

print("sorting nodes by id...")
nodes = nodes[nodes[:, 0].argsort()]
print("done...")

print("sorting edges by source node...")
edgesOut = edgesOut[edgesOut[:, 0].argsort()]
edgesIn = edgesIn[edgesIn[:, 0].argsort()]
print("done...")

print("remove edges with no node in sources")
se1 = len(edgesOut)
edgesOut = edgesOut[numpy.in1d(edgesOut[:, 0], nodes[:, 0])]
edgesIn = edgesIn[numpy.in1d(edgesIn[:, 0], nodes[:, 0])]
se2 = len(edgesOut)
print("removed ", (se1 - se2), " edges because of missing source node")

print("count number of edges per source node")
offsetsOut[0] = 0
offsetsIn[0] = 0


for i in range(0, len(nodes) - 1):
    if i % 100000 == 0:
        print(round(float(i) / len(nodes) * 100.0), "% done")
        #print(i+offsets[i])

    startIndex = numpy.searchsorted(edgesOut[:, 0], nodes[i][0])
for index, center in enumerate(centers):
	centers[index] = np.random.uniform(minValue, maxValue, 5)

for iteration in range(iterations):
	#	Set pixels to their cluster
	for idx, data in enumerate(pixel_matrix_scaled):
		distanceToCenters = np.ndarray(shape=(K))
		for index, center in enumerate(centers):
			distanceToCenters[index] = euclidean_distances(data.reshape(1, -1), center.reshape(1, -1))
		pixel_cluster_vector[idx] = np.argmin(distanceToCenters)

	##################################################################################################
	#	Check if a cluster is ever empty, if so append a random datapoint to it
	clusterToCheck = np.arange(K)		#contains an array with all clusters
										#e.g for K=10, array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
	clustersEmpty = np.in1d(clusterToCheck, pixel_cluster_vector)
										#^ [True True False True * n of clusters] False means empty
	for index, item in enumerate(clustersEmpty):
		if item == False:
			pixel_cluster_vector[np.random.randint(len(pixel_cluster_vector))] = index
			# ^ sets a random pixel to that cluster as mentioned in the homework writeup
	##################################################################################################

	#	Move centers to the centroid of their cluster
	for i in range(K):
		dataInCenter = []

		for index, item in enumerate(pixel_cluster_vector):
			if item == i:
				dataInCenter.append(pixel_matrix_scaled[index])
		dataInCenter = np.array(dataInCenter)
Ejemplo n.º 51
0
import time

import EmergeIterate

t0=time.time()

iterate = EmergeIterate.EmergeIterate(22, 'MD10')
iterate.open_snapshots()
iterate.init_new_quantities()
#iterate.map_halos_between_snapshots()

import pandas as pd

print('N f1 halos', len(iterate.f1['/halo_properties/id'].value))
print('N f0 halos', len(iterate.f0['/halo_properties/id'].value))
f1_new_halos = (n.in1d(iterate.f1['/halo_properties/id'].value, iterate.f0['/halo_properties/desc_id'].value)==False)
# new halos [f1_new_halos] to be fed to EmergeIterate.compute_qtys_new_halos_pk
 
f1_evolved_halos = (new_halos==False)

f0_propagated_halos = n.in1d(iterate.f0['/halo_properties/desc_id'].value, iterate.f1['/halo_properties/id'].value)
f0_lost_halos = (propagated_halos==False)

print('lost halos', len(iterate.f0['/halo_properties/desc_id'].value[f0_lost_halos]))
print('propagated halos', len(iterate.f0['/halo_properties/desc_id'].value[f0_propagated_halos]))

print('new halos', len(iterate.f1['/halo_properties/desc_id'].value[f1_new_halos]))
print('evolved halos', len(iterate.f1['/halo_properties/id'].value[f1_evolved_halos]))


# parmi f0_propagated_halos
Ejemplo n.º 52
0
                     label='Supplied Data')
        ax0.plot(qdata, Idata, 'bo', alpha=0.5, label='Interpolated Data')
        ax0.plot(qbinsc, Imean, 'r.', label='Scattering from Density')
        handles, labels = ax0.get_legend_handles_labels()
        handles = [handles[2], handles[0], handles[1]]
        labels = [labels[2], labels[0], labels[1]]
        ymin = np.min(np.hstack((I, Idata, Imean)))
        ymax = np.max(np.hstack((I, Idata, Imean)))
        ax0.set_ylim([0.5 * ymin, 1.5 * ymax])
        ax0.legend(handles, labels)
        ax0.semilogy()
        ax0.set_ylabel('I(q)')

        ax1 = plt.subplot(gs[1])
        ax1.plot(qdata, qdata * 0, 'k--')
        residuals = np.log10(Imean[np.in1d(qbinsc, qdata)]) - np.log10(Idata)
        ax1.plot(qdata, residuals, 'ro-')
        ylim = ax1.get_ylim()
        ymax = np.max(np.abs(ylim))
        n = int(.9 * len(residuals))
        ymax = np.max(np.abs(residuals[:-n]))
        ax1.set_ylim([-ymax, ymax])
        ax1.yaxis.major.locator.set_params(nbins=5)
        xlim = ax0.get_xlim()
        ax1.set_xlim(xlim)
        ax1.set_ylabel('Residuals')
        ax1.set_xlabel(r'q ($\mathrm{\AA^{-1}}$)')
        #plt.setp(ax0.get_xticklabels(), visible=False)
        plt.tight_layout()
        plt.savefig(args.output + '_fit.png', dpi=150)
        plt.close()
Ejemplo n.º 53
0
    def evaluateRecommender(self, recommender_object, n_processes=None):
        """
        :param recommender_object: the trained recommender object, a Recommender subclass
        :param URM_test_list: list of URMs to test the recommender against, or a single URM object
        :param cutoff_list: list of cutoffs to be use to report the scores, or a single cutoff
        """

        if n_processes is None:
            n_processes = int(multiprocessing.cpu_count() / 2)

        start_time = time.time()

        # Split the users to evaluate
        n_processes = min(n_processes, len(self.usersToEvaluate))
        batch_len = int(len(self.usersToEvaluate) / n_processes)
        batch_len = max(batch_len, 1)

        sequential_evaluators_list = []
        sequential_evaluators_n_users_list = []

        for n_evaluator in range(n_processes):

            stat_user = n_evaluator * batch_len
            end_user = min((n_evaluator + 1) * batch_len,
                           len(self.usersToEvaluate))

            if n_evaluator == n_processes - 1:
                end_user = len(self.usersToEvaluate)

            batch_users = self.usersToEvaluate[stat_user:end_user]
            sequential_evaluators_n_users_list.append(len(batch_users))

            not_in_batch_users = np.in1d(self.usersToEvaluate,
                                         batch_users,
                                         invert=True)
            not_in_batch_users = np.array(
                self.usersToEvaluate)[not_in_batch_users]

            new_evaluator = _ParallelEvaluator_batch(
                self.URM_test,
                self.cutoff_list,
                ignore_users=not_in_batch_users)

            sequential_evaluators_list.append(new_evaluator)

        if self.ignore_items_flag:
            recommender_object.set_items_to_ignore(self.ignore_items_ID)

        run_parallel_evaluator_partial = partial(
            _run_parallel_evaluator, recommender_object=recommender_object)

        pool = multiprocessing.Pool(processes=n_processes, maxtasksperchild=1)
        resultList = pool.map(run_parallel_evaluator_partial,
                              sequential_evaluators_list)

        print(
            "ParallelEvaluator: Processed {} ( {:.2f}% ) in {:.2f} seconds. Users per second: {:.0f}"
            .format(
                len(self.usersToEvaluate), 100.0 *
                float(len(self.usersToEvaluate)) / len(self.usersToEvaluate),
                time.time() - start_time,
                float(len(self.usersToEvaluate)) / (time.time() - start_time)))

        sys.stdout.flush()
        sys.stderr.flush()

        results_dict = {}
        n_users_evaluated = 0

        for cutoff in self.cutoff_list:
            results_dict[cutoff] = create_empty_metrics_dict(
                self.n_items, self.n_users, recommender_object.URM_train,
                self.ignore_items_ID, self.ignore_users_ID, cutoff,
                self.diversity_object)

        for new_result_index in range(len(resultList)):

            new_result, n_users_evaluated_batch = resultList[new_result_index]
            n_users_evaluated += n_users_evaluated_batch

            results_dict = _merge_results_dict(results_dict, new_result,
                                               n_users_evaluated_batch)

        for cutoff in self.cutoff_list:
            for key in results_dict[cutoff].keys():
                results_dict[cutoff][key] /= len(self.usersToEvaluate)

        if n_users_evaluated > 0:

            for cutoff in self.cutoff_list:

                results_current_cutoff = results_dict[cutoff]

                for key in results_current_cutoff.keys():

                    value = results_current_cutoff[key]

                    if isinstance(value, Metrics_Object):
                        results_current_cutoff[key] = value.get_metric_value()
                    else:
                        results_current_cutoff[key] = value / n_users_evaluated

                precision_ = results_current_cutoff[
                    EvaluatorMetrics.PRECISION.value]
                recall_ = results_current_cutoff[EvaluatorMetrics.RECALL.value]

                if precision_ + recall_ != 0:
                    results_current_cutoff[EvaluatorMetrics.F1.value] = 2 * (
                        precision_ * recall_) / (precision_ + recall_)

        else:
            print(
                "WARNING: No users had a sufficient number of relevant items")

        sequential_evaluators_list = None
        sequential_evaluators_n_users_list = None

        if self.ignore_items_flag:
            recommender_object.reset_items_to_ignore()

        results_run_string = self.get_result_string(results_dict)

        return (results_dict, results_run_string)
Ejemplo n.º 54
0
    def sampling(self,
                 data=None,
                 pars=None,
                 chains=4,
                 iter=2000,
                 warmup=None,
                 thin=1,
                 seed=None,
                 init='random',
                 sample_file=None,
                 diagnostic_file=None,
                 verbose=False,
                 **kwargs):
        """Draw samples from the model.

        Parameters
        ----------
        data : dict
            A Python dictionary providing the data for the model. Variables
            for Stan are stored in the dictionary as expected. Variable
            names are the keys and the values are their associated values.
            Stan only accepts certain kinds of values; see Notes.

        pars : list of string, optional
            A list of strings indicating parameters of interest. By default
            all parameters specified in the model will be stored.

        chains : int, optional
            Positive integer specifying number of chains. 4 by default.

        iter : int, 2000 by default
            Positive integer specifying how many iterations for each chain
            including warmup.

        warmup : int, iter//2 by default
            Positive integer specifying number of warmup (aka burin) iterations.
            As `warmup` also specifies the number of iterations used for step-size
            adaption, warmup samples should not be used for inference.

        thin : int, 1 by default
            Positive integer specifying the period for saving samples.

        seed : int, optional
            The seed, a positive integer for random number generation. Only
            one seed is needed when multiple chains are used, as the other
            chain's seeds are generated from the first chain's to prevent
            dependency among random number streams. By default, seed is
            ``random.randint(0, MAX_UINT)``.

        init : {0, '0', 'random', function returning dict, list of dict}, optional
            Specifies how initial parameter values are chosen: 0 or '0'
            initializes all to be zero on the unconstrained support; 'random'
            generates random initial values; list of size equal to the number
            of chains (`chains`), where the list contains a dict with initial
            parameter values; function returning a dict with initial parameter
            values. The function may take an optional argument `chain_id`.

        sample_file : string, optional
            File name specifying where samples for *all* parameters and other
            saved quantities will be written. If not provided, no samples
            will be written. If the folder given is not writable, a temporary
            directory will be used. When there are multiple chains, an underscore
            and chain number are appended to the file name. By default do not
            write samples to file.

        diagnostic_file : str, optional
            File name indicating where diagonstic data for all parameters
            should be written. If not writable, a temporary directory is used.

        verbose : boolean, False by default
            Indicates whether intermediate output should be piped to the
            console. This output may be useful for debugging.

        Returns
        -------
        fit : StanFit4<model_name>
            Instance containing the fitted results.

        Other parameters
        ----------------
        chain_id : int, optional
            Iterable of unique ints naming chains or int with which to start.
        leapfrog_steps : int, optional
        epsilon : float, optional
        gamma : float, optional
        delta : float, optional
        equal_step_sizes : bool, optional
        max_treedepth : int, optional
        nondiag_mass : bool, optional
        test_grad : bool
            If True, Stan will not perform any sampling. Instead the gradient
            calculation is tested and printed out and the fitted stanfit4model
            object will be in test gradient mode. False is the default.
        refresh : int, optional
            Controls how to indicate progress during sampling. By default,
            `refresh` = max(iter//10, 1).

        Notes
        -----

        More details can be found in Stan's manual. The default sampler is
        NUTS2, where `leapfrog_steps` is ``-1`` and `equal_step_sizes` is
        False. To use NUTS with full mass matrix, set `nondiag_mass` to True.

        Examples
        --------
        >>> from pystan import StanModel
        >>> m = StanModel(model_code='parameters {real y;} model {y ~ normal(0,1);}')
        >>> m.sampling(iter=100)

        """
        # NOTE: in this function, iter masks iter() the python function.
        # If this ever turns out to be a problem just add:
        # iter_ = iter
        # del iter  # now builtins.iter is available
        if sample_file is not None:
            raise NotImplementedError("sample_file not supported yet")
        if diagnostic_file is not None:
            raise NotImplementedError("diagnostic_file not supported yet")
        if data is None:
            data = {}
        if warmup is None:
            warmup = int(iter // 2)

        data_r, data_i = pystan.misc._split_data(data)
        fit = self.fit_class(data_r, data_i)
        # store a copy of the data passed to fit in the class
        fit.data = {}
        fit.data.update(data_i)
        fit.data.update(data_r)

        m_pars = fit._get_param_names()
        p_dims = fit._get_param_dims()
        if pars is not None and len(pars) > 0:
            if not all(p in m_pars for p in pars):
                pars = np.asarray(pars)
                unmatched = pars[np.invert(np.in1d(pars, m_pars))]
                msg = "No parameter(s): {}; sampling not done."
                raise ValueError(msg.format(', '.join(pars[unmatched])))

        if chains < 1:
            raise ValueError("The number of chains is less than one; sampling"
                             "not done.")

        if seed is None:
            seed = random.randint(0, MAX_UINT)
        seed = int(seed)

        args_list = pystan.misc._config_argss(chains=chains,
                                              iter=iter,
                                              warmup=warmup,
                                              thin=thin,
                                              init=init,
                                              seed=seed,
                                              sample_file=sample_file,
                                              diagnostic_file=diagnostic_file,
                                              **kwargs)

        # number of samples saved after thinning
        warmup2 = 1 + (warmup - 1) // thin
        n_kept = 1 + (iter - warmup - 1) // thin
        n_save = n_kept + warmup2

        samples, rets = [], []  # samples and return values
        if kwargs.get('test_grad') is None:
            mode = "SAMPLING"
        else:
            mode = "TESTING GRADIENT"
        # FIXME: use concurrent.futures to parallelize this
        for i in range(chains):
            if kwargs.get('refresh') is None or kwargs.get('refresh') > 0:
                chain_num = i + 1
                msg = "{} FOR MODEL {} NOW (CHAIN {})."
                logging.info(msg.format(mode, self.model_name, chain_num))
            ret, samples_i = fit._call_sampler(args_list[i])
            samples.append(samples_i)
            # call_sampler in stan_fit.hpp will raise a std::runtime_error
            # if the return value is non-zero. Cython will generate a
            # RuntimeError from this.
            # FIXME: should one mimic rstan and "return" an empty StanFit?
            # That is, should I wipe fit's attributes and return that?

        inits_used = pystan.misc._organize_inits([s['inits'] for s in samples],
                                                 m_pars, p_dims)

        # test_gradient mode: don't sample
        if samples[0]['test_grad']:
            fit.sim = {'num_failed': [s['num_failed'] for s in samples]}
            return fit

        perm_lst = [np.random.permutation(n_kept) for _ in range(chains)]
        fnames_oi = fit._get_param_fnames_oi()
        n_flatnames = len(fnames_oi)
        fit.sim = {
            'samples': samples,
            # rstan has this; name clashes with 'chains' in samples[0]['chains']
            'chains': len(samples),
            'iter': iter,
            'warmup': warmup,
            'thin': thin,
            'n_save': [n_save] * chains,
            'warmup2': [warmup2] * chains,
            'permutation': perm_lst,
            'pars_oi': fit._get_param_names_oi(),
            'dims_oi': fit._get_param_dims(),
            'fnames_oi': fnames_oi,
            'n_flatnames': n_flatnames
        }
        fit.model_name = self.model_name
        fit.model_pars = m_pars
        fit.par_dims = p_dims
        fit.mode = 0
        fit.inits = inits_used
        fit.stan_args = args_list
        fit.stanmodel = self
        fit.date = datetime.datetime.now()
        return fit
Ejemplo n.º 55
0
    def _run_evaluation_on_selected_users(self, recommender_object,
                                          usersToEvaluate):

        start_time = time.time()
        start_time_print = time.time()

        results_dict = {}

        for cutoff in self.cutoff_list:
            results_dict[cutoff] = create_empty_metrics_dict(
                self.n_items, self.n_users, recommender_object.URM_train,
                self.ignore_items_ID, self.ignore_users_ID, cutoff,
                self.diversity_object)

        n_users_evaluated = 0

        for test_user in usersToEvaluate:

            # Being the URM CSR, the indices are the non-zero column indexes
            relevant_items = self.get_user_relevant_items(test_user)

            n_users_evaluated += 1

            recommended_items = recommender_object.recommend(
                test_user,
                remove_seen_flag=self.exclude_seen,
                cutoff=self.max_cutoff,
                remove_top_pop_flag=False,
                remove_CustomItems_flag=self.ignore_items_flag)

            is_relevant = np.in1d(recommended_items,
                                  relevant_items,
                                  assume_unique=True)

            for cutoff in self.cutoff_list:

                results_current_cutoff = results_dict[cutoff]

                is_relevant_current_cutoff = is_relevant[0:cutoff]
                recommended_items_current_cutoff = recommended_items[0:cutoff]

                results_current_cutoff[
                    EvaluatorMetrics.ROC_AUC.value] += roc_auc(
                        is_relevant_current_cutoff)
                results_current_cutoff[
                    EvaluatorMetrics.PRECISION.value] += precision(
                        is_relevant_current_cutoff, len(relevant_items))
                results_current_cutoff[
                    EvaluatorMetrics.RECALL.value] += recall(
                        is_relevant_current_cutoff, relevant_items)
                results_current_cutoff[EvaluatorMetrics.RECALL_TEST_LEN.
                                       value] += recall_min_test_len(
                                           is_relevant_current_cutoff,
                                           relevant_items)
                results_current_cutoff[EvaluatorMetrics.MAP.value] += map(
                    is_relevant_current_cutoff, relevant_items)
                results_current_cutoff[EvaluatorMetrics.MRR.value] += rr(
                    is_relevant_current_cutoff)
                results_current_cutoff[EvaluatorMetrics.NDCG.value] += ndcg(
                    recommended_items_current_cutoff,
                    relevant_items,
                    relevance=self.get_user_test_ratings(test_user),
                    at=cutoff)
                results_current_cutoff[
                    EvaluatorMetrics.HIT_RATE.
                    value] += is_relevant_current_cutoff.sum()
                results_current_cutoff[EvaluatorMetrics.ARHR.value] += arhr(
                    is_relevant_current_cutoff)

                results_current_cutoff[
                    EvaluatorMetrics.NOVELTY.value].add_recommendations(
                        recommended_items_current_cutoff)
                results_current_cutoff[
                    EvaluatorMetrics.DIVERSITY_GINI.value].add_recommendations(
                        recommended_items_current_cutoff)
                results_current_cutoff[EvaluatorMetrics.SHANNON_ENTROPY.
                                       value].add_recommendations(
                                           recommended_items_current_cutoff)
                results_current_cutoff[
                    EvaluatorMetrics.COVERAGE_ITEM.value].add_recommendations(
                        recommended_items_current_cutoff)
                results_current_cutoff[
                    EvaluatorMetrics.COVERAGE_USER.value].add_recommendations(
                        recommended_items_current_cutoff, test_user)
                results_current_cutoff[
                    EvaluatorMetrics.DIVERSITY_MEAN_INTER_LIST.
                    value].add_recommendations(
                        recommended_items_current_cutoff)
                results_current_cutoff[EvaluatorMetrics.DIVERSITY_HERFINDAHL.
                                       value].add_recommendations(
                                           recommended_items_current_cutoff)

                if EvaluatorMetrics.DIVERSITY_SIMILARITY.value in results_current_cutoff:
                    results_current_cutoff[
                        EvaluatorMetrics.DIVERSITY_SIMILARITY.
                        value].add_recommendations(
                            recommended_items_current_cutoff)

            if time.time() - start_time_print > 30 or n_users_evaluated == len(
                    self.usersToEvaluate):
                print(
                    "SequentialEvaluator: Processed {} ( {:.2f}% ) in {:.2f} seconds. Users per second: {:.0f}"
                    .format(
                        n_users_evaluated, 100.0 * float(n_users_evaluated) /
                        len(self.usersToEvaluate),
                        time.time() - start_time,
                        float(n_users_evaluated) / (time.time() - start_time)))

                sys.stdout.flush()
                sys.stderr.flush()

                start_time_print = time.time()

        return results_dict, n_users_evaluated
Ejemplo n.º 56
0
    def evaluateRecommender(self, recommender_object):
        """
        :param recommender_object: the trained recommender object, a Recommender subclass
        :param URM_test_list: list of URMs to test the recommender against, or a single URM object
        :param cutoff_list: list of cutoffs to be use to report the scores, or a single cutoff
        """

        results_dict = {}

        for cutoff in self.cutoff_list:
            results_dict[cutoff] = create_empty_metrics_dict(
                self.n_items, self.n_users, recommender_object.URM_train,
                self.ignore_items_ID, self.ignore_users_ID, cutoff,
                self.diversity_object)

        start_time = time.time()
        start_time_print = time.time()

        n_eval = 0

        self.__all_items = np.arange(0, self.n_items, dtype=np.int)
        self.__all_items = set(self.__all_items)

        if self.ignore_items_flag:
            recommender_object.set_items_to_ignore(self.ignore_items_ID)

        for test_user in self.usersToEvaluate:

            # Being the URM CSR, the indices are the non-zero column indexes
            relevant_items = self.get_user_relevant_items(test_user)

            n_eval += 1

            self.user_specific_remove_items(recommender_object, test_user)

            # recommended_items = recommender_object.recommend(np.array(test_user), remove_seen_flag=self.exclude_seen,
            #                                                  cutoff = self.max_cutoff, remove_top_pop_flag=False, remove_CustomItems_flag=self.ignore_items_flag)
            recommended_items = recommender_object.recommend(
                np.atleast_1d(test_user),
                remove_seen_flag=self.exclude_seen,
                cutoff=self.max_cutoff,
                remove_top_pop_flag=False,
                remove_CustomItems_flag=self.ignore_items_flag)

            recommended_items = np.array(recommended_items[0])

            recommender_object.reset_items_to_ignore()

            is_relevant = np.in1d(recommended_items,
                                  relevant_items,
                                  assume_unique=True)

            for cutoff in self.cutoff_list:

                results_current_cutoff = results_dict[cutoff]

                is_relevant_current_cutoff = is_relevant[0:cutoff]
                recommended_items_current_cutoff = recommended_items[0:cutoff]

                results_current_cutoff[
                    EvaluatorMetrics.ROC_AUC.value] += roc_auc(
                        is_relevant_current_cutoff)
                results_current_cutoff[
                    EvaluatorMetrics.PRECISION.value] += precision(
                        is_relevant_current_cutoff, len(relevant_items))
                results_current_cutoff[
                    EvaluatorMetrics.RECALL.value] += recall(
                        is_relevant_current_cutoff, relevant_items)
                results_current_cutoff[EvaluatorMetrics.RECALL_TEST_LEN.
                                       value] += recall_min_test_len(
                                           is_relevant_current_cutoff,
                                           relevant_items)
                results_current_cutoff[EvaluatorMetrics.MAP.value] += map(
                    is_relevant_current_cutoff, relevant_items)
                results_current_cutoff[EvaluatorMetrics.MRR.value] += rr(
                    is_relevant_current_cutoff)
                results_current_cutoff[EvaluatorMetrics.NDCG.value] += ndcg(
                    recommended_items_current_cutoff,
                    relevant_items,
                    relevance=self.get_user_test_ratings(test_user),
                    at=cutoff)
                results_current_cutoff[
                    EvaluatorMetrics.HIT_RATE.
                    value] += is_relevant_current_cutoff.sum()
                results_current_cutoff[EvaluatorMetrics.ARHR.value] += arhr(
                    is_relevant_current_cutoff)

                results_current_cutoff[
                    EvaluatorMetrics.NOVELTY.value].add_recommendations(
                        recommended_items_current_cutoff)
                results_current_cutoff[
                    EvaluatorMetrics.DIVERSITY_GINI.value].add_recommendations(
                        recommended_items_current_cutoff)
                results_current_cutoff[EvaluatorMetrics.SHANNON_ENTROPY.
                                       value].add_recommendations(
                                           recommended_items_current_cutoff)
                results_current_cutoff[
                    EvaluatorMetrics.COVERAGE_ITEM.value].add_recommendations(
                        recommended_items_current_cutoff)
                results_current_cutoff[
                    EvaluatorMetrics.COVERAGE_USER.value].add_recommendations(
                        recommended_items_current_cutoff, test_user)
                results_current_cutoff[
                    EvaluatorMetrics.DIVERSITY_MEAN_INTER_LIST.
                    value].add_recommendations(
                        recommended_items_current_cutoff)
                results_current_cutoff[EvaluatorMetrics.DIVERSITY_HERFINDAHL.
                                       value].add_recommendations(
                                           recommended_items_current_cutoff)

                if EvaluatorMetrics.DIVERSITY_SIMILARITY.value in results_current_cutoff:
                    results_current_cutoff[
                        EvaluatorMetrics.DIVERSITY_SIMILARITY.
                        value].add_recommendations(
                            recommended_items_current_cutoff)

            if time.time() - start_time_print > 30 or n_eval == len(
                    self.usersToEvaluate):
                print(
                    "SequentialEvaluator: Processed {} ( {:.2f}% ) in {:.2f} seconds. Users per second: {:.0f}"
                    .format(n_eval,
                            100.0 * float(n_eval) / len(self.usersToEvaluate),
                            time.time() - start_time,
                            float(n_eval) / (time.time() - start_time)))

                sys.stdout.flush()
                sys.stderr.flush()

                start_time_print = time.time()

        if (n_eval > 0):

            for cutoff in self.cutoff_list:

                results_current_cutoff = results_dict[cutoff]

                for key in results_current_cutoff.keys():

                    value = results_current_cutoff[key]

                    if isinstance(value, Metrics_Object):
                        results_current_cutoff[key] = value.get_metric_value()
                    else:
                        results_current_cutoff[key] = value / n_eval

                precision_ = results_current_cutoff[
                    EvaluatorMetrics.PRECISION.value]
                recall_ = results_current_cutoff[EvaluatorMetrics.RECALL.value]

                if precision_ + recall_ != 0:
                    results_current_cutoff[EvaluatorMetrics.F1.value] = 2 * (
                        precision_ * recall_) / (precision_ + recall_)

        else:
            print(
                "WARNING: No users had a sufficient number of relevant items")

        if self.ignore_items_flag:
            recommender_object.reset_items_to_ignore()

        results_run_string = self.get_result_string(results_dict)

        return (results_dict, results_run_string)
Ejemplo n.º 57
0
def main(subject,
         session,
         bids_folder,
         modalities=None,
         registration_scheme='linear_precise'):

    if modalities is None:
        modalities = ['T2starw', 'MTw', 'TSE']

    curdir = op.dirname(op.realpath(__file__))
    registration_scheme = op.join(curdir, f'{registration_scheme}.json')

    anat_dir = op.join(bids_folder, f'sub-{subject}', f'ses-{session}', 'anat')

    target = op.join(bids_folder, 'derivatives', 'fmriprep', f'sub-{subject}',
                     'anat', f'sub-{subject}_desc-preproc_T1w.nii.gz')
    target_mask = op.join(bids_folder, 'derivatives', 'fmriprep',
                          f'sub-{subject}', 'anat',
                          f'sub-{subject}_desc-brain_mask.nii.gz')

    init_regs = glob.glob(
        op.join(bids_folder, 'derivatives', 'fmriprep', f'sub-{subject}',
                f'ses-{session}', 'anat', '*from-orig_to-T1w_*.txt'))

    t1w_to_mni_transform = op.join(
        bids_folder, 'derivatives', 'fmriprep', f'sub-{subject}', 'anat',
        f'sub-{subject}_from-T1w_to-MNI152NLin2009cAsym_mode-image_xfm.h5')

    t1w_in_mni = op.join(
        bids_folder, 'derivatives', 'fmriprep', f'sub-{subject}', 'anat',
        f'sub-{subject}_space-MNI152NLin2009cAsym_desc-preproc_T1w.nii.gz')

    mni_brain_mask = op.join(
        bids_folder, 'derivatives', 'fmriprep', f'sub-{subject}', 'anat',
        f'sub-{subject}_space-MNI152NLin2009cAsym_desc-brain_mask.nii.gz')

    if len(init_regs) > 0:
        init_reg = init_regs[0]
    else:
        init_reg = None

    print(f'INITIAL TRANSFORM: {init_reg}')

    def make_registration_wf(input_file,
                             name,
                             subject=subject,
                             target=target,
                             target_mask=target_mask,
                             init_reg=init_reg,
                             t1w_to_mni_transform=t1w_to_mni_transform,
                             t1w_in_mni=t1w_in_mni,
                             mni_brain_mask=mni_brain_mask,
                             ants_numthreads=8):

        workflow = pe.Workflow(base_dir='/tmp/workflow_folders', name=name)

        input_node = pe.Node(niu.IdentityInterface(fields=[
            'input_file', 'target', 'target_mask', 't1w_to_mni_transform',
            't1w_in_mni', 'mni_brain_mask'
        ]),
                             name='inputspec')
        input_node.inputs.input_file = input_file
        input_node.inputs.target = target
        input_node.inputs.target_mask = target_mask
        input_node.inputs.init_reg = init_reg
        input_node.inputs.t1w_to_mni_transform = t1w_to_mni_transform
        input_node.inputs.t1w_in_mni = t1w_in_mni
        input_node.inputs.mni_brain_mask = mni_brain_mask

        convert_dtype = pe.Node(fsl.maths.MathsCommand(), name='convert_dtype')
        convert_dtype.inputs.output_datatype = 'double'

        workflow.connect(input_node, 'input_file', convert_dtype, 'in_file')

        inu_n4 = pe.Node(
            N4BiasFieldCorrection(
                dimension=3,
                save_bias=True,
                num_threads=ants_numthreads,
                rescale_intensities=True,
                copy_header=True,
            ),
            n_procs=ants_numthreads,
            name="inu_n4",
        )

        workflow.connect(convert_dtype, 'out_file', inu_n4, 'input_image')

        register = pe.Node(Registration(from_file=registration_scheme,
                                        num_threads=ants_numthreads,
                                        verbose=True),
                           name='registration')

        workflow.connect(inu_n4, 'output_image', register, 'moving_image')

        if init_reg:
            workflow.connect(input_node, 'init_reg', register,
                             'initial_moving_transform')

        workflow.connect(input_node, 'target', register, 'fixed_image')
        workflow.connect(input_node, 'target_mask', register,
                         'fixed_image_masks')

        def get_mask(input_image):
            from nilearn import image
            from nipype.utils.filemanip import split_filename
            import os.path as op

            _, fn, _ = split_filename(input_image)
            mask = image.math_img('im != 0', im=input_image)
            new_fn = op.abspath(fn + '_mask.nii.gz')
            mask.to_filename(new_fn)

            return new_fn

        mask_node = pe.Node(niu.Function(function=get_mask,
                                         input_names=['input_image'],
                                         output_names=['mask']),
                            name='mask_node')

        workflow.connect(register, 'warped_image', mask_node, 'input_image')

        gen_grid_node = pe.Node(GenerateSamplingReference(),
                                name='gen_grid_node')

        workflow.connect(mask_node, 'mask', gen_grid_node, 'fov_mask')
        workflow.connect(inu_n4, 'output_image', gen_grid_node, 'moving_image')
        workflow.connect(input_node, 'target', gen_grid_node, 'fixed_image')

        datasink_image_t1w = pe.Node(DerivativesDataSink(
            out_path_base='registration',
            compress=True,
            base_directory=op.join(bids_folder, 'derivatives')),
                                     name='datasink_image_t1w')
        workflow.connect(input_node, 'input_file', datasink_image_t1w,
                         'source_file')
        datasink_image_t1w.inputs.space = 'T1w'
        datasink_image_t1w.inputs.desc = 'registered'

        datasink_report_t1w = pe.Node(DerivativesDataSink(
            out_path_base='registration',
            space='T1w',
            base_directory=op.join(bids_folder, 'derivatives'),
            datatype='figures'),
                                      name='datasink_report_t1w')

        workflow.connect(input_node, 'input_file', datasink_report_t1w,
                         'source_file')
        datasink_report_t1w.inputs.space = 'T1w'

        transformer = pe.Node(ApplyTransforms(
            interpolation='LanczosWindowedSinc',
            generate_report=True,
            num_threads=ants_numthreads),
                              n_procs=ants_numthreads,
                              name='transformer')
        workflow.connect(transformer, 'output_image', datasink_image_t1w,
                         'in_file')
        workflow.connect(transformer, 'out_report', datasink_report_t1w,
                         'in_file')
        workflow.connect(inu_n4, 'output_image', transformer, 'input_image')
        workflow.connect(gen_grid_node, 'out_file', transformer,
                         'reference_image')
        workflow.connect(register, 'composite_transform', transformer,
                         'transforms')

        concat_transforms = pe.Node(niu.Merge(2), name='concat_transforms')

        workflow.connect(register, 'composite_transform', concat_transforms,
                         'in2')
        workflow.connect(input_node, 't1w_to_mni_transform', concat_transforms,
                         'in1')

        transformer_to_mni1 = pe.Node(ApplyTransforms(
            interpolation='LanczosWindowedSinc',
            generate_report=False,
            num_threads=ants_numthreads),
                                      n_procs=ants_numthreads,
                                      name='transformer_to_mni1')
        workflow.connect(inu_n4, 'output_image', transformer_to_mni1,
                         'input_image')
        workflow.connect(input_node, 't1w_in_mni', transformer_to_mni1,
                         'reference_image')
        workflow.connect(concat_transforms, 'out', transformer_to_mni1,
                         'transforms')

        mask_node_mni = pe.Node(niu.Function(function=get_mask,
                                             input_names=['input_image'],
                                             output_names=['mask']),
                                name='mask_node_mni')
        workflow.connect(transformer_to_mni1, 'output_image', mask_node_mni,
                         'input_image')

        def join_masks(mask1, mask2):
            from nilearn import image
            from nipype.utils.filemanip import split_filename
            import os.path as op

            _, fn, _ = split_filename(mask1)

            new_mask = image.math_img('(im1 > 0) & (im2 > 0)',
                                      im1=mask1,
                                      im2=mask2)

            new_fn = op.abspath(fn + '_jointmask' + '.nii.gz')

            new_mask.to_filename(new_fn)

            return new_fn

        combine_masks_node = pe.Node(niu.Function(
            function=join_masks,
            input_names=['mask1', 'mask2'],
            output_names=['combined_mask']),
                                     name='combine_mask_node')

        workflow.connect(mask_node_mni, 'mask', combine_masks_node, 'mask1')
        workflow.connect(input_node, 'mni_brain_mask', combine_masks_node,
                         'mask2')

        gen_grid_node_mni = pe.Node(GenerateSamplingReference(),
                                    name='gen_grid_node_mni')
        workflow.connect(combine_masks_node, 'combined_mask',
                         gen_grid_node_mni, 'fov_mask')
        workflow.connect(inu_n4, 'output_image', gen_grid_node_mni,
                         'moving_image')
        workflow.connect(input_node, 't1w_in_mni', gen_grid_node_mni,
                         'fixed_image')

        transformer_to_mni2 = pe.Node(ApplyTransforms(
            interpolation='LanczosWindowedSinc',
            generate_report=False,
            num_threads=ants_numthreads),
                                      n_procs=ants_numthreads,
                                      name='transformer_to_mni2')
        workflow.connect(inu_n4, 'output_image', transformer_to_mni2,
                         'input_image')
        workflow.connect(gen_grid_node_mni, 'out_file', transformer_to_mni2,
                         'reference_image')
        workflow.connect(concat_transforms, 'out', transformer_to_mni2,
                         'transforms')

        datasink_image_mni = pe.Node(DerivativesDataSink(
            out_path_base='registration',
            compress=True,
            base_directory=op.join(bids_folder, 'derivatives')),
                                     name='datasink_mni')
        datasink_image_mni.inputs.source_file = input_file
        datasink_image_mni.inputs.space = 'MNI152NLin2009cAsym'
        datasink_image_mni.inputs.desc = 'registered'

        workflow.connect(input_node, 'input_file', datasink_image_mni,
                         'source_file')
        workflow.connect(transformer_to_mni2, 'output_image',
                         datasink_image_mni, 'in_file')

        return workflow

    df = BIDSLayout(anat_dir, validate=False).to_df()
    print(df['extension'])
    df = df[np.in1d(df.extension, ['.nii', '.nii.gz'])]

    if 'acquisition' in df.columns:
        df = df[~((df.suffix == 'T2starw') & (df.acquisition != 'average'))]

    print(df)
    df = df[np.in1d(df['suffix'], modalities)]

    for ix, row in df.iterrows():
        logging.info('Registering {row.path}')
        wf_name = f'register_{subject}_{session}_{row.suffix}'

        if ('run' in row) and row.run:
            wf_name += f'_{row.run}'

        wf = make_registration_wf(row.path, wf_name)

        wf.run()
Ejemplo n.º 58
0
    def _run_evaluation_on_selected_users(self,
                                          recommender_object,
                                          usersToEvaluate,
                                          block_size=1000):

        start_time = time.time()
        start_time_print = time.time()

        results_dict = {}

        for cutoff in self.cutoff_list:
            results_dict[cutoff] = create_empty_metrics_dict(
                self.n_items, self.n_users, recommender_object.get_URM_train(),
                self.ignore_items_ID, self.ignore_users_ID, cutoff,
                self.diversity_object)

        n_users_evaluated = 0

        # Start from -block_size to ensure it to be 0 at the first block
        user_batch_start = 0
        user_batch_end = 0

        while user_batch_start < len(self.usersToEvaluate):

            user_batch_end = user_batch_start + block_size
            user_batch_end = min(user_batch_end, len(usersToEvaluate))

            test_user_batch_array = np.array(
                usersToEvaluate[user_batch_start:user_batch_end])
            user_batch_start = user_batch_end

            # Compute predictions for a batch of users using vectorization, much more efficient than computing it one at a time
            recommended_items_batch_list = recommender_object.recommend(
                test_user_batch_array,
                remove_seen_flag=self.exclude_seen,
                cutoff=self.max_cutoff,
                remove_top_pop_flag=False,
                remove_CustomItems_flag=self.ignore_items_flag)

            # Compute recommendation quality for each user in batch
            for batch_user_index in range(len(recommended_items_batch_list)):

                user_id = test_user_batch_array[batch_user_index]
                recommended_items = recommended_items_batch_list[
                    batch_user_index]

                # Being the URM CSR, the indices are the non-zero column indexes
                relevant_items = self.get_user_relevant_items(user_id)
                is_relevant = np.in1d(recommended_items,
                                      relevant_items,
                                      assume_unique=True)

                n_users_evaluated += 1

                for cutoff in self.cutoff_list:

                    results_current_cutoff = results_dict[cutoff]

                    is_relevant_current_cutoff = is_relevant[0:cutoff]
                    recommended_items_current_cutoff = recommended_items[
                        0:cutoff]

                    results_current_cutoff[
                        EvaluatorMetrics.ROC_AUC.value] += roc_auc(
                            is_relevant_current_cutoff)
                    results_current_cutoff[
                        EvaluatorMetrics.PRECISION.value] += precision(
                            is_relevant_current_cutoff, len(relevant_items))
                    results_current_cutoff[
                        EvaluatorMetrics.RECALL.value] += recall(
                            is_relevant_current_cutoff, relevant_items)
                    results_current_cutoff[EvaluatorMetrics.RECALL_TEST_LEN.
                                           value] += recall_min_test_len(
                                               is_relevant_current_cutoff,
                                               relevant_items)
                    results_current_cutoff[EvaluatorMetrics.MAP.value] += map(
                        is_relevant_current_cutoff, relevant_items)
                    results_current_cutoff[EvaluatorMetrics.MRR.value] += rr(
                        is_relevant_current_cutoff)
                    results_current_cutoff[
                        EvaluatorMetrics.NDCG.value] += ndcg(
                            recommended_items_current_cutoff,
                            relevant_items,
                            relevance=self.get_user_test_ratings(user_id),
                            at=cutoff)
                    results_current_cutoff[
                        EvaluatorMetrics.HIT_RATE.
                        value] += is_relevant_current_cutoff.sum()
                    results_current_cutoff[
                        EvaluatorMetrics.ARHR.value] += arhr(
                            is_relevant_current_cutoff)

                    results_current_cutoff[
                        EvaluatorMetrics.NOVELTY.value].add_recommendations(
                            recommended_items_current_cutoff)
                    results_current_cutoff[
                        EvaluatorMetrics.DIVERSITY_GINI.
                        value].add_recommendations(
                            recommended_items_current_cutoff)
                    results_current_cutoff[
                        EvaluatorMetrics.SHANNON_ENTROPY.
                        value].add_recommendations(
                            recommended_items_current_cutoff)
                    results_current_cutoff[
                        EvaluatorMetrics.COVERAGE_ITEM.
                        value].add_recommendations(
                            recommended_items_current_cutoff)
                    results_current_cutoff[
                        EvaluatorMetrics.COVERAGE_USER.
                        value].add_recommendations(
                            recommended_items_current_cutoff, user_id)
                    results_current_cutoff[
                        EvaluatorMetrics.DIVERSITY_MEAN_INTER_LIST.
                        value].add_recommendations(
                            recommended_items_current_cutoff)
                    results_current_cutoff[
                        EvaluatorMetrics.DIVERSITY_HERFINDAHL.
                        value].add_recommendations(
                            recommended_items_current_cutoff)

                    if EvaluatorMetrics.DIVERSITY_SIMILARITY.value in results_current_cutoff:
                        results_current_cutoff[
                            EvaluatorMetrics.DIVERSITY_SIMILARITY.
                            value].add_recommendations(
                                recommended_items_current_cutoff)

                if time.time(
                ) - start_time_print > 30 or n_users_evaluated == len(
                        self.usersToEvaluate):
                    print(
                        "SequentialEvaluator: Processed {} ( {:.2f}% ) in {:.2f} seconds. Users per second: {:.0f}"
                        .format(
                            n_users_evaluated,
                            100.0 * float(n_users_evaluated) /
                            len(self.usersToEvaluate),
                            time.time() - start_time,
                            float(n_users_evaluated) /
                            (time.time() - start_time)))

                    sys.stdout.flush()
                    sys.stderr.flush()

                    start_time_print = time.time()

        return results_dict, n_users_evaluated
print x.mean()
#standard deviation
print 'std deviation'
print x.std()
#variance
print 'variance'
print x.var()

#logical operations -and / or-
condition2 = np.array([True, False, True])
print 'for operator'
print condition2.any()  #for operator

print 'and operator'
print condition2.all()  #and operator

#sorting in numpy arrays
unsorted_array = np.array([1, 2, 8, 10, 7, 3])
unsorted_array.sort()
print "sorting"
print unsorted_array

#unique
arr2 = np.array(['solid', 'solid', 'liquid', 'liquid', 'gas', 'gas'])
print "unique"
print np.unique(arr2)

#in One Dimension
print "1 dimension"
print np.in1d(['solid', 'gas', 'plasma'], arr2)
Ejemplo n.º 60
0
    def load_km(self, as_sparse=True, sort=False):
        """Load and construct mass and stiffness matrices from an
        ANSYS full file.

        Parameters
        ----------
        as_sparse : bool, optional
            Outputs the mass and stiffness matrices as scipy csc
            sparse arrays when True by default.

        sort : bool, optional
            Rearranges the k and m matrices such that the rows
            correspond to to the sorted rows and columns in dor_ref.
            Also sorts dor_ref.

        Returns
        -------
        dof_ref : (n x 2) np.int32 array
            This array contains the node and degree corresponding to
            each row and column in the mass and stiffness matrices.
            In a 3 DOF analysis the dof integers will correspond to:
            0 - x
            1 - y
            2 - z
            Sort these values by node number and DOF by enabling the
            sort parameter.

        k : (n x n) np.float or scipy.csc array
            Stiffness array

        m : (n x n) np.float or scipy.csc array
            Mass array

        Examples
        --------
        >>> import pyansys
        >>> full = pyansys.read_binary('file.rst')
        >>> dof_ref, k, m = full.load_km()
        >>> print(k)
        (0, 0)       163408119.6581276
        (0, 1)               0.0423270
        (1, 1)       163408119.6581276
        :	:
        (342, 344)     6590544.8717949
        (343, 344)    -6590544.8717950
        (344, 344)    20426014.9572689

        Notes
        -----
        Constrained entries are removed from the mass and stiffness
        matrices.

        Constrained DOF can be accessed from ``const``, which returns
        the node number and DOF constrained in ANSYS.
        """
        if not os.path.isfile(self.filename):
            raise Exception('%s not found' % self.filename)

        if as_sparse:
            try:
                from scipy.sparse import csc_matrix, coo_matrix
            except ImportError:
                raise ImportError('Unable to load scipy, use ``load_km`` with '
                                  '``as_sparse=False``')

        # number of terms in stiffness matrix
        ntermK = two_ints_to_long(self._header['ntermKl'], self._header['ntermKh'])

        ptrSTF = self._header['ptrSTF']  # Location of stiffness matrix
        ptrMAS = self._header['ptrMAS']  # Location in file to mass matrix

        # number of terms in mass matrix
        ntermM = two_ints_to_long(self._header['ntermMl'], self._header['ntermMh'])
        ptrDOF = self._header['ptrDOF']  # pointer to DOF info

        # DOF information
        with open(self.filename, 'rb') as f:
            read_table(f, skip=True)  # standard header
            read_table(f, skip=True)  # full header
            read_table(f, skip=True)  # number of degrees of freedom

            # Nodal equivalence table
            neqv = read_table(f, cython=True)

            # read number of degrees of freedom for each node and constant tables
            f.seek(ptrDOF*4)
            ndof = read_table(f, cython=True)
            const = read_table(f, cython=True)

        # degree of freedom reference and number of degress of freedom per node
        dof_ref = [ndof, neqv]
        self.ndof = ndof

        # Read k and m blocks (see help(ReadArray) for block description)
        if ntermK:
            krow, kcol, kdata = _binary_reader.read_array(self.filename,
                                                          ptrSTF,
                                                          ntermK,
                                                          self.neqn,
                                                          const)
        else:
            warnings.warn('Missing stiffness matrix')
            kdata = None

        if ntermM:
            mrow, mcol, mdata = _binary_reader.read_array(self.filename,
                                                          ptrMAS,
                                                          ntermM,
                                                          self.neqn,
                                                          const)
        else:
            warnings.warn('Missing mass matrix')
            mdata = None

        # remove constrained entries
        if np.any(const < 0):
            if kdata is not None:
                remove = np.nonzero(const < 0)[0]
                mask = ~np.logical_or(np.in1d(krow, remove), np.in1d(kcol, remove))
                krow = krow[mask]
                kcol = kcol[mask]
                kdata = kdata[mask]

            if mdata is not None:
                mask = ~np.logical_or(np.in1d(mrow, remove), np.in1d(mcol, remove))
                mrow = mrow[mask]
                mcol = mcol[mask]
                mdata = mdata[mask]


        # sort nodal equivalence
        dof_ref, index, nref, dref = _binary_reader.sort_nodal_eqlv(self.neqn,
                                                                    neqv, ndof)

        # store constrained dof information
        unsort_dof_ref = np.vstack((nref, dref)).T
        self._const = unsort_dof_ref[const < 0]

        if sort:  # make sorting the same as ANSYS rdfull would output
            # resort to make in upper triangle
            krow = index[krow]
            kcol = index[kcol]
            krow, kcol = np.sort(np.vstack((krow, kcol)), 0)

            if mdata is not None:
                mrow = index[mrow]
                mcol = index[mcol]
                mrow, mcol = np.sort(np.vstack((mrow, mcol)), 0)

        else:
            dof_ref = unsort_dof_ref

        # store data for later reference
        if kdata is not None:
            self._krow = krow
            self._kcol = kcol
            self._kdata = kdata
        if mdata is not None:
            self._mrow = mrow
            self._mcol = mcol
            self._mdata = mdata

        # output as a sparse matrix
        if as_sparse:

            if kdata is not None:
                k = coo_matrix((self.neqn,) * 2)
                k.data = kdata  # data has to be set first
                k.row = krow
                k.col = kcol

                # convert to csc matrix (generally faster for sparse solvers)
                k = csc_matrix(k)
            else:
                k = None

            if mdata is not None:
                m = coo_matrix((self.neqn,) * 2)
                m.data = mdata
                m.row = mrow
                m.col = mcol

                # convert to csc matrix (generally faster for sparse solvers)
                m = csc_matrix(m)
            else:
                m = None

        else:
            if kdata is not None:
                k = np.zeros((self.neqn,) * 2)
                k[krow, kcol] = kdata
            else:
                k = None

            if mdata is not None:
                m = np.zeros((self.neqn,) * 2)
                m[mrow, mcol] = mdata
            else:
                m = None

        return dof_ref, k, m