def test_permutation(): """Test permutation""" v = Var(np.arange(6)) res = np.empty((5, 6)) for i, y in enumerate(resample(v, samples=5)): res[i] = y.x # with unit s = Factor('abc', tile=2) for i, y in enumerate(resample(v, samples=5, unit=s)): res[i] = y.x # check we have only appropriate cells cols = [np.unique(res[:, i]) for i in range(res.shape[1])] for i in range(3): eq_(len(np.setdiff1d(cols[i], [i, i + 3])), 0) for i in range(3, 6): eq_(len(np.setdiff1d(cols[i], [i, i - 3])), 0) # check we have some variability eq_(max(map(len, cols)), 2) # make sure sequence is stable eq_(list(map(tuple, permute_order(4, 3))), [(2, 3, 1, 0), (2, 1, 3, 0), (0, 2, 3, 1)])
def compare_dfs(df_before, df_later): """Compare two dfs, which new columns appear and which new unique values appear per column. Cannot tell if new combinations of values appeared.""" all_cols = np.union1d(df_before.columns, df_later.columns) param_keys = set(all_cols) - set(('test', 'time', 'train', 'test_sample', 'train_sample', 'valid', 'valtest')) for col in param_keys: if col not in df_later.columns: print("Column only exists before: {:s}".format(col)) elif col not in df_before.columns: print("Column only exists after: {:s}".format(col)) else: unique_vals_1 = df_before[col].unique() unique_vals_2 = df_later[col].unique() try: only_in_1 = np.setdiff1d(unique_vals_1, unique_vals_2) if len(only_in_1) > 0: print("Only exist before for {:s}: {:s}".format(col, str(only_in_1))) except ValueError: log.warn("Could not compare before:\n{:s}\nwith after:\n{:s}".format( unique_vals_1, unique_vals_2)) try: only_in_2 = np.setdiff1d(unique_vals_2, unique_vals_1) if len(only_in_2) > 0: print("Only exist after for {:s}: {:s}".format(col, str(only_in_2))) except ValueError: log.warn("Could not compare after:\n{:s}\nwith before:\n{:s}".format( unique_vals_2, unique_vals_1))
def _sensoryComputeLearningMode(self, anchorInput): """ Associate this location with a sensory input. Subsequently, anchorInput will activate the current location during anchor(). @param anchorInput (numpy array) A sensory input. This will often come from a feature-location pair layer. """ overlaps = self.connections.computeActivity(anchorInput, self.connectedPermanence) activeSegments = np.where(overlaps >= self.activationThreshold)[0] potentialOverlaps = self.connections.computeActivity(anchorInput) matchingSegments = np.where(potentialOverlaps >= self.learningThreshold)[0] # Cells with a active segment: reinforce the segment cellsForActiveSegments = self.connections.mapSegmentsToCells( activeSegments) learningActiveSegments = activeSegments[ np.in1d(cellsForActiveSegments, self.activeCells)] remainingCells = np.setdiff1d(self.activeCells, cellsForActiveSegments) # Remaining cells with a matching segment: reinforce the best # matching segment. candidateSegments = self.connections.filterSegmentsByCell( matchingSegments, remainingCells) cellsForCandidateSegments = ( self.connections.mapSegmentsToCells(candidateSegments)) candidateSegments = candidateSegments[ np.in1d(cellsForCandidateSegments, remainingCells)] onePerCellFilter = np2.argmaxMulti(potentialOverlaps[candidateSegments], cellsForCandidateSegments) learningMatchingSegments = candidateSegments[onePerCellFilter] newSegmentCells = np.setdiff1d(remainingCells, cellsForCandidateSegments) for learningSegments in (learningActiveSegments, learningMatchingSegments): self._learn(self.connections, self.rng, learningSegments, anchorInput, potentialOverlaps, self.initialPermanence, self.sampleSize, self.permanenceIncrement, self.permanenceDecrement, self.maxSynapsesPerSegment) # Remaining cells without a matching segment: grow one. numNewSynapses = len(anchorInput) if self.sampleSize != -1: numNewSynapses = min(numNewSynapses, self.sampleSize) if self.maxSynapsesPerSegment != -1: numNewSynapses = min(numNewSynapses, self.maxSynapsesPerSegment) newSegments = self.connections.createSegments(newSegmentCells) self.connections.growSynapsesToSample( newSegments, anchorInput, numNewSynapses, self.initialPermanence, self.rng) self.activeSegments = activeSegments
def check_exp(manip_name): dir_names = glob(os.path.join(manip_name, '*[0-9]')) dir_names.sort() for name in dir_names: if not os.path.isdir(name): dir_names.remove(name) h5_pag_names = glob(os.path.join(manip_name, 'volfloat/*pag0001.h5')) h5_pag_names.sort() h5_pag_names = [os.path.basename(name) for name in h5_pag_names] h5_pag_list = np.array([int(name[-13:-10]) for name in h5_pag_names]) h5_names = glob(os.path.join(manip_name, 'volfloat/*[0-9]0001.h5')) h5_names.sort() h5_list = np.array([int(name[-10:-7]) for name in h5_names]) h5_names = [os.path.basename(name) for name in h5_names] vol_names = glob(os.path.join(manip_name, 'volfloat/*.vol')) vol_names.sort() vol_names = [os.path.basename(name) for name in vol_names] dir_names = [os.path.basename(name) for name in dir_names] dir_list = np.array([int(name[-3:]) for name in dir_names]) missing_h5 = np.setdiff1d(dir_list, h5_list) missing_h5_pag = np.setdiff1d(dir_list, h5_pag_list) print "dir: ", dir_list print "h5: ", h5_list print "h5_pag:", h5_pag_list print "vol: ", vol_names print "missing h5: ", missing_h5 print "missing h5 pag: ", missing_h5_pag
def move_S_E(self, g): ind = np.where( g < 10**(-6) )[0] idx = np.intersect1d( self.S, ind ).tolist() self.S = np.setdiff1d( self.S, idx ).tolist() self.E = np.setdiff1d( self.E, idx ).tolist() self.E.extend( idx )
def stabilize(self, A, t, node, selected): """ check that the given node can be solved with a stable linear system """ C = linalg.inv(np.dot(A.T, A)) best_idx = 0 c_min = self.theta+1 a_idx = -1 if node.dim == x_dim: for candidate in np.setdiff1d(np.nonzero(self.y_computed)[0], selected): (c, _tau) = self.compute_lcn_extend(A, t, C, self.y[candidate,:], node.idx, candidate) if c < c_min: a_star = self.y[candidate,:] a_idx = candidate c_min = c tau = _tau else: for candidate in np.setdiff1d(np.nonzero(self.x_computed)[0], selected): (c, _tau) = self.compute_lcn_extend(A, t, C, self.x[candidate,:], candidate, node.idx) if c < c_min: a_star = self.x[candidate,:] a_idx = candidate c_min = c tau = _tau if c_min < self.theta: log.debug('c_min : %f' % c_min) return a_star, a_idx return None
def prune_feature_to_category(self, min_features_per_cat): self.feature_to_category = self.feature_to_category.drop_duplicates() ftc_features = self.feature_to_category[self.feature_name].unique() features = self.feature_df[self.feature_name].unique() not_in_ftc = np.setdiff1d(features, ftc_features, assume_unique=True) not_in_feature_df = np.setdiff1d(ftc_features, features, assume_unique=True) if len(not_in_ftc) > 0: logging.warning( "{} features from the features file are not " "in the feature_to_category mapping.".format(len(not_in_ftc)) ) if len(not_in_feature_df) > 0: logging.warning( "{} features from the feature_to_category mapping are not " "in the features file. Removing them.".format(len(not_in_feature_df)) ) self.feature_to_category = ( self.feature_to_category.set_index(self.feature_name).drop(pd.Index(not_in_feature_df)).reset_index() ) if min_features_per_cat > 0: n_cats = len(self.feature_to_category[self.category_name].unique()) logging.info( "Removing categories for which there are less than {} " "features in the features file.".format(min_features_per_cat) ) self.feature_to_category = self.feature_to_category.groupby(self.category_name).filter( lambda x: len(x) >= min_features_per_cat ) n_removed = n_cats - len(self.feature_to_category[self.category_name].unique()) logging.info("{} categories removed.".format(n_removed)) self.feature_to_category.reset_index(inplace=True)
def performGreedyMIExperimentalDesign(costFuncMI, nPoints, start=0): """ Performs greedy experimental design by minimizing mutual information by choosing points among all available MC points obtained from kernel Parameters ---------- nPoints : int number of points to start with Returns ------- endVals : ndarray final point set """ indKeep = [start] indexOptions = np.setdiff1d(np.arange(costFuncMI.nMC), indKeep) for ii in xrange(len(indKeep),nPoints): out = np.zeros((len(indexOptions))) for jj, ind in enumerate(indexOptions): out[jj] = costFuncMI.evaluate(ind, indKeep) #Index to add indNew = indexOptions[np.argmax(out)] indKeep.append(indNew) #Remove index from options indexOptions = np.setdiff1d(indexOptions, indNew) #remove this index return costFuncMI.mcPoints[indKeep,:]
def assert_meg_snr(actual, desired, min_tol, med_tol=500., chpi_med_tol=500., msg=None): """Assert channel SNR of a certain level. Mostly useful for operations like Maxwell filtering that modify MEG channels while leaving EEG and others intact. """ picks = pick_types(desired.info, meg=True, exclude=[]) picks_desired = pick_types(desired.info, meg=True, exclude=[]) assert_array_equal(picks, picks_desired, err_msg='MEG pick mismatch') chpis = pick_types(actual.info, meg=False, chpi=True, exclude=[]) chpis_desired = pick_types(desired.info, meg=False, chpi=True, exclude=[]) if chpi_med_tol is not None: assert_array_equal(chpis, chpis_desired, err_msg='cHPI pick mismatch') others = np.setdiff1d(np.arange(len(actual.ch_names)), np.concatenate([picks, chpis])) others_desired = np.setdiff1d(np.arange(len(desired.ch_names)), np.concatenate([picks_desired, chpis_desired])) assert_array_equal(others, others_desired, err_msg='Other pick mismatch') if len(others) > 0: # if non-MEG channels present assert_allclose(_get_data(actual, others), _get_data(desired, others), atol=1e-11, rtol=1e-5, err_msg='non-MEG channel mismatch') _check_snr(actual, desired, picks, min_tol, med_tol, msg, kind='MEG') if chpi_med_tol is not None and len(chpis) > 0: _check_snr(actual, desired, chpis, 0., chpi_med_tol, msg, kind='cHPI')
def inverse_transform(self, yt): """Transform the given indicator matrix into label sets Parameters ---------- yt : array or sparse matrix of shape (n_samples, n_classes) A matrix containing only 1s ands 0s. Returns ------- y : list of tuples The set of labels for each sample such that `y[i]` consists of `classes_[j]` for each `yt[i, j] == 1`. """ check_is_fitted(self, 'classes_') if yt.shape[1] != len(self.classes_): raise ValueError('Expected indicator for {0} classes, but got {1}' .format(len(self.classes_), yt.shape[1])) if sp.issparse(yt): yt = yt.tocsr() if len(yt.data) != 0 and len(np.setdiff1d(yt.data, [0, 1])) > 0: raise ValueError('Expected only 0s and 1s in label indicator.') return [tuple(self.classes_.take(yt.indices[start:end])) for start, end in zip(yt.indptr[:-1], yt.indptr[1:])] else: unexpected = np.setdiff1d(yt, [0, 1]) if len(unexpected) > 0: raise ValueError('Expected only 0s and 1s in label indicator. ' 'Also got {0}'.format(unexpected)) return [tuple(self.classes_.compress(indicators)) for indicators in yt]
def evaluate(self, index, indexAdded): """ index: int index of MC points whihc one is considering to add indexAdded: list of indices indices already added """ point = self.mcPoints[index,:].reshape((1,self.space.dimension)) var = self.gaussianProcess.kernel.evaluate(point, point) pointsAdded = self.mcPoints[indexAdded,:].reshape((len(indexAdded), self.space.dimension)) #Numerator numpointVsAdded = self.gaussianProcess.kernel.evaluate(pointsAdded, point) covNum = gp_kernel_utilities.calculateCovarianceMatrix(self.gaussianProcess.kernel, pointsAdded, self.gaussianProcess.noise) invCovNum = np.linalg.pinv(covNum) numerator = var - np.dot(numpointVsAdded.T, np.dot(invCovNum, numpointVsAdded)) #Compute variance of all locations other than those provided in indexAdded and index leftIndices = np.setdiff1d(np.arange(self.nMC),indexAdded) leftIndices = np.setdiff1d(leftIndices, [index]) pointsLeft = self.mcPoints[leftIndices,:] #Denominator numpointVsNotAdded = self.gaussianProcess.kernel.evaluate(pointsLeft, point) covDen = gp_kernel_utilities.calculateCovarianceMatrix(self.gaussianProcess.kernel, pointsLeft, self.gaussianProcess.noise) invCovDen = np.linalg.pinv(covDen) denominator = var - np.dot(numpointVsNotAdded.T, np.dot(invCovDen, numpointVsNotAdded)) out = numerator/denominator return out
def Convert_Bin_To_Domain_TMP(n_bins, signal_idx, gap_idx, pvalues=None, pvalue_cut=None): bins = dict() rmv_idx = np.setdiff1d(np.arange(n_bins),gap_idx) proc_region = Which_process_region(rmv_idx, n_bins, min_size=0) for key in proc_region: bins[proc_region[key]['start']] = {'start': proc_region[key]['start'], 'end' : (proc_region[key]['end']+1), 'score': 10, 'tag' : 'gap'} rmv_idx = np.union1d(signal_idx, gap_idx) proc_region = Which_process_region(rmv_idx, n_bins, min_size=0) for key in proc_region: bins[proc_region[key]['start']] = {'start': proc_region[key]['start'], 'end' : (proc_region[key]['end']+1), 'score': 10, 'tag' : 'domain'} rmv_idx = np.setdiff1d(np.arange(n_bins),signal_idx) proc_region = Which_process_region(rmv_idx, n_bins, min_size=1) for key in proc_region: bins[proc_region[key]['start']] = {'start': proc_region[key]['start'], 'end' : (proc_region[key]['end']+1), 'score': 10, 'tag' : 'boundary'} if pvalues is not None and pvalue_cut is not None: for key in bins: if bins[key]['tag'] == 'domain': start_id = bins[key]['start'] end_id = bins[key]['end'] p_value_constr = pvalues[start_id:end_id] bins[key]['score'] = p_value_constr.mean() p_value_constr = p_value_constr[p_value_constr < pvalue_cut] if end_id - start_id == len(p_value_constr): bins[key]['tag'] = "boundary" return bins
def generate_ind_pairs(img_num_per_cam_person, person_inds, cam_inds): positive_pair_a = numpy.asarray([], dtype='int32') positive_pair_b = numpy.asarray([], dtype='int32') negative_pair_a = numpy.asarray([], dtype='int32') negative_pair_b = numpy.asarray([], dtype='int32') past_person_inds = numpy.asarray([], dtype='int32') for i in person_inds: past_person_inds = numpy.append(past_person_inds, i) past_cam_inds = numpy.asarray([], dtype='int32') for j in cam_inds: past_cam_inds = numpy.append(past_cam_inds, j) target_abs_inds = get_abs_ind_one(img_num_per_cam_person, i, j) pos_cand_abs_inds = get_abs_ind_per_cam_person(img_num_per_cam_person, numpy.asarray([i], dtype='int32'), numpy.setdiff1d(cam_inds, past_cam_inds)) neg_cand_abs_inds = get_abs_ind_per_cam_person(img_num_per_cam_person, numpy.setdiff1d(person_inds, past_person_inds), numpy.setdiff1d(cam_inds, j)) [tmp_a, tmp_b] = numpy.meshgrid(pos_cand_abs_inds, target_abs_inds) positive_pair_a = numpy.append(positive_pair_a, tmp_b.flatten()) positive_pair_b = numpy.append(positive_pair_b, tmp_a.flatten()) [tmp_a, tmp_b] = numpy.meshgrid(neg_cand_abs_inds, target_abs_inds) negative_pair_a = numpy.append(negative_pair_a, tmp_b.flatten()) negative_pair_b = numpy.append(negative_pair_b, tmp_a.flatten()) return [positive_pair_a, positive_pair_b, negative_pair_a, negative_pair_b]
def write_mountains_cities_plains(df): ''' INPUT: (1) Pandas DataFrame OUTPUT: None This function will look through the locations and write a new column called 'mcp' (mountains cities plains) that somewhat arbitrarily makes a split between locations in Colorado that fall into one of these categories. MCP categorizations not used in final model, but were helpful in the beginning stages of training the model. ''' cities = reduce(np.intersect1d, [np.where(df['elev'] > 1300), np.where(df['elev'] < 2400), np.where(df['lat'] > 38.6), np.where(df['lng'] > -105.25), np.where(df['lng'] < -104.2)]) not_cities = np.setdiff1d(np.arange(len(df)), cities) plains = reduce(np.intersect1d, [not_cities, np.where(df['lng'] > -105.25), np.where(df['elev'] < 1800)]) not_plains = np.setdiff1d(np.arange(len(df)), plains) mountains = reduce(np.intersect1d, [not_cities, not_plains, np.where(df['lng'] < -102)]) category = ['mtn' if i in mountains else 'city' if i in cities else 'plains' for i in range(len(df))] df['mcp'] = category
def _lower_values(self, df, index_dict): ''' lowers df items to the lowest entry in the field where the entry is larger than min_entries this is necessary because models should be trained on data with equal representation from all classes. parameters: df = the data frame with all the data index_dict = the dictionary with correct index values for a given field key returns: index_dict = modified index dict that only has good index values for each field key ''' for field in self.fields: if field == "age": continue value_counts = df[field].value_counts() while min(value_counts) < self.min_entries: #delete lowest index if it is too low index_dict[field] = np.setdiff1d( index_dict[field], df[df[field] == value_counts.idxmin()].index, assume_unique=True ) value_counts = value_counts.drop(value_counts.idxmin()) min_value = min(value_counts) good_items_grouped = df.ix[index_dict[field]].groupby(field) for value in value_counts.index: #lower all values to the lowest field count index_dict[field] = np.setdiff1d( index_dict[field], good_items_grouped.groups[value][min_value:], assume_unique=True ) return index_dict
def getSubjectList(GroupDF,RejectMotion=True,motionThresh=0.2,motionType='RMS',poor_performer=20): #Reject Depressed subjects depressed=['CCD072','CCD098','CCD083','CCD062','CCD061','CCD051','CCD087'] # poor_performers=['CCD094','CCD075','CCD086','CCD080','CCD076','CCD065','CCD034'] #reject large motion subjects allsubj=unique(GroupDF['Subject_ID']) #remove depressed allsubj = np.setdiff1d(allsubj,depressed) if motionType=='FD': motionReject=unique((GroupDF[GroupDF.meanFD>motionThresh]['Subject_ID'])) else: motionReject=unique((GroupDF[GroupDF.Max_Relative_RMS_Displacement>motionThresh]['Subject_ID'])) if RejectMotion: goodsubj=np.setdiff1d(allsubj,motionReject) else: goodsubj=allsubj # goodsubj=np.setdiff1d(goodsubj,np.array(depressed)) df=getSubjectButtonResponses() tmp=df.groupby('subject')['number'].sum() goodperformers=np.array(tmp[tmp>poor_performer].index[:]) poorperformers = np.setdiff1d(goodsubj,goodperformers) #remove poor performers goodsubj=np.intersect1d(goodsubj,goodperformers) return goodsubj,motionReject,poorperformers
def runTest(self): m=fmsh.MeshTri() m.refine(4) # split mesh into two sets of triangles I1=np.arange(m.t.shape[1]/2) I2=np.setdiff1d(np.arange(m.t.shape[1]),I1) bix=m.boundary_facets() bix1=bix[0:len(bix)/2] bix2=np.setdiff1d(bix,bix1) a=fasm.AssemblerElement(m,felem.ElementTriP1()) def dudv(du,dv): return du[0]*dv[0]+du[1]*dv[1] A=a.iasm(dudv) A1=a.iasm(dudv,tind=I1) A2=a.iasm(dudv,tind=I2) B=a.fasm(dudv) B1=a.fasm(dudv,find=bix1) B2=a.fasm(dudv,find=bix2) f=a.iasm(lambda v: 1*v) I=m.interior_nodes() x=np.zeros(A.shape[0]) x[I]=scipy.sparse.linalg.spsolve((A+B)[I].T[I].T,f[I]) X=np.zeros(A.shape[0]) X[I]=scipy.sparse.linalg.spsolve((A1+B1)[I].T[I].T+(A2+B2)[I].T[I].T,f[I]) self.assertAlmostEqual(np.linalg.norm(x-X),0.0,places=10)
def addGraph(self, simulatedGraph): """ Compute the objective between this graph and the realGraph at the time of the last event of this one. """ t = simulatedGraph.endTime() #Only select vertices added after startTime and before t inds = numpy.setdiff1d(simulatedGraph.removedIndsAt(t), simulatedGraph.removedIndsAt(self.startTime)) subgraph = simulatedGraph.subgraph(inds) inds = numpy.setdiff1d(self.realGraph.removedIndsAt(t), self.realGraph.removedIndsAt(self.startTime)) subTargetGraph = self.realGraph.subgraph(inds) #logging.debug("Simulated size " + str(subgraph.size) + " and real size " + str(subTargetGraph.size)) self.graphSizes.append(subgraph.size) #Only add objective if the real graph has nonzero size if subTargetGraph.size != 0 and subgraph.size <= self.maxSize: permutation, distance, time = self.matcher.match(subgraph, subTargetGraph) lastObj, lastGraphObj, lastLabelObj = self.matcher.distance(subgraph, subTargetGraph, permutation, True, False, True) self.computationalTimes.append(time) self.objectives.append(lastObj) self.graphObjs.append(lastGraphObj) self.labelObjs.append(lastLabelObj) self.times.append(t) else: logging.debug("Not adding objective at time " + str(t) + " with simulated size " + str(subgraph.size) + " and real size " + str(subTargetGraph.size))
def split_train_test(train,y,prop): m=np.shape(train)[0] #ordered list of classes classes=np.unique(y) #list of lists where in each sublist are the indexes of each class props=[np.where(y==i)[0] for i in classes] """create list of arrays where each array has the indexes of the test fold instances""" idxs_test=[random.sample(_class,len(_class)/prop) for _class in props] #list of lists removes the picked test indexes _props=[np.setdiff1d(props[i],idxs_test[i]) for i in range(len(props))] #well it worked this way props=_props #turn list of lists into a list idxs_test=np.array([item for array in idxs_test for item in array]) #select test subset _test=train.irow(idxs_test) """Removes the test indexes from the range of original train to form the train indexes""" idxs_train=np.setdiff1d(range(m),idxs_test) #selects train subset _train=train.irow(idxs_train) #train target variable y_train=y[idxs_train] #test target variable y_test=y[idxs_test] return _train,_test,y_train,y_test
def test_permutation(): """Test permutation""" v = Var(np.arange(6)) res = np.empty((5, 6)) for i, y in enumerate(resample(v, samples=5)): res[i] = y.x logging.info('Standard Permutation:\n%s' % res) # with unit s = Factor('abc', tile=2) for i, y in enumerate(resample(v, samples=5, unit=s)): res[i] = y.x logging.info('Permutation with Unit:\n%s' % res) # check we have only appropriate cells cols = [np.unique(res[:, i]) for i in xrange(res.shape[1])] for i in xrange(3): eq_(len(np.setdiff1d(cols[i], [i, i + 3])), 0) for i in xrange(3, 6): eq_(len(np.setdiff1d(cols[i], [i, i - 3])), 0) # check we have some variability eq_(max(map(len, cols)), 2) # with sign flip v = Var(np.arange(1, 7)) res = np.empty((2 ** 6 - 1, 6)) for i, y in enumerate(resample(v, samples=-1, sign_flip=True)): res[i] = y.x logging.info('Permutation with sign_flip:\n%s' % res) ok_(np.all(res.min(1) < 0), "Not all permutations have a sign flip")
def kfold(data, labels, k): try: import svmutil except: return 0 prabs = [] for xxx in range(0, 10): picks = np.random.choice(len(data), len(data) / k, replace=False) testLabel = labels[picks] testPoint = data[picks] trainPoint = data[np.setdiff1d(range(0, len(data)), picks)] trainLabel = labels[np.setdiff1d(range(0, len(data)), picks)] trainLabel = trainLabel.tolist() trainPoint = trainPoint.tolist() prob = svmutil.svm_problem(trainLabel, trainPoint) param = svmutil.svm_parameter('-t 3 -c 4 -b 1 -q') testLabel = testLabel.tolist() testPoint = testPoint.tolist() m = svmutil.svm_train(prob, param) svmutil.svm_save_model('n.model', m) p_label, p_acc, p_val = svmutil.svm_predict(testLabel, testPoint, m, '-b 1') prabs.append(p_acc[0]) print sum(prabs) / float(len(prabs)) print 'std' + str(np.std(prabs)) return sum(prabs) / float(len(prabs))
def stratified_group_kfold(y,group,K): testfold=np.zeros(y.shape[0]) zero_pool = np.asarray(np.where(y == 0)).flatten() one_pool = np.asarray(np.where(y == 1)).flatten() for kk in range(K): zero_target = zero_pool.shape[0]/(K-kk) one_target = one_pool.shape[0]/(K-kk) test_zero_pool = np.random.choice(zero_pool,size=zero_target) test_zero_index = [] test_one_pool = np.random.choice(one_pool,size=one_target) test_one_index = [] for i in test_zero_pool: if len(test_zero_index)<= zero_target: tmp = np.array(np.where(group==group[i])).ravel() for j in tmp: test_zero_index.append(j) for i in test_one_pool: if len(test_one_index)<= one_target: tmp = np.array(np.where(group==group[i])).ravel() for j in tmp: test_one_index.append(j) test_zero_index = np.unique(test_zero_index) test_one_index = np.unique(test_one_index) test_index = np.concatenate((test_one_index,test_zero_index)) zero_pool = np.setdiff1d(zero_pool, test_zero_index) one_pool = np.setdiff1d(one_pool, test_one_index) testfold[test_index]=kk return testfold
def dropDimensions(self, drop=None, keep=None): """ Drop some dimensions of the data given their indexes Parameters ---------- drop : list A list of integer indexes of the dimensions to drop keep : list A list of integer indexes of the dimensions to keep Examples -------- >>> data.dropDimensions([1, 24, 3]) >>> data.dropDimensions(keep=[2, 10]) """ if drop is not None and not isinstance(drop, np.ndarray): drop = np.array(drop) if keep is not None and not isinstance(keep, np.ndarray): keep = np.array(keep) if drop is not None and keep is not None: raise AttributeError('drop and keep arguments for dropDimensions are mutually exclusive. Pass only one.') if keep is not None: keepidx = keep dropidx = np.arange(self.numDimensions) dropidx = np.setdiff1d(dropidx, keepidx) else: dropidx = drop keepidx = np.arange(self.numDimensions) keepidx = np.setdiff1d(keepidx, dropidx) for i, d in enumerate(self.dat): self.dat[i] = self.dat[i][:, keepidx] self.map = self.map.drop(self.map.index[dropidx]) self.map = self.map.reset_index(drop=True)
def main(): size = 150000 #1000000 bits = 18 dt = np.dtype([('first','u8'),('mid','u4'),('last','u8')]) a = np.arange(size) dd = np.zeros(size, dtype='a20') ss = a.view(dtype='a%s'%(a.dtype.itemsize)) pos = 0 for s in ss: sha = hashlib.sha1() sha.update(s) dd[pos] = sha.digest() pos += 1 digs = dd.view(dtype=dt) msk = 2**bits-1 addresses = np.bitwise_and(digs['last'], msk) ua, uinds = np.unique(addresses, return_index=True) remains = np.setdiff1d(np.arange(size), uinds, assume_unique=True) uaddr = np.setdiff1d(ua, np.unique(addresses[remains]), assume_unique=True) ln = 2**bits fr = ln-len(ua) collides = size-len(uaddr) print " size: %d"%(ln) print " samples: %d"%(size) print " unique: %d"%(len(uaddr)) print " free: %d/%d = %.1f%%"%(fr, ln, 100.0*fr/ln) print "collisions: %d/%d = %.1f%%"%(collides, ln, 100.0*collides/ln)
def neurons_df(): """ Join all available neuron information into a single Pandas DF """ nodes = chen_neurons_df() pos = kaiser_positions_df() nodes_only = np.setdiff1d(nodes.index.values, pos.index.values) pos_only = np.setdiff1d(pos.index.values, nodes.index.values) print "Nodes but not pos: ", nodes_only print "Pos but not nodes: ", pos_only # Perform a left join with nodes on the left and positions on the right, # so we only use those neurons provided by Chen dfr = nodes.join(pos, how="left") # Now add missing positions from symmetric partners for node in nodes_only: mirror = sym_node_name(node) print "Copying position for ", node, " from ", mirror dfr.loc[node, 'kx'] = dfr.loc[mirror, 'kx'] dfr.loc[node, 'ky'] = dfr.loc[mirror, 'ky'] # Join with info about neuron class and type ww_nodes = ww_neurons_df() dfr = dfr.join(ww_nodes, how="left") # Join links from worm atlas website wa_links = wa_links_df() dfr = dfr.join(wa_links, how="left") print dfr.info() return dfr
def ancestors(x, adj): """Find the ancestors of a node x in a DAG adj. Parameters : x : int : The node. adj : np.ndarray[n_node, n_node] : Adjacency matrix. Returns : a : np.ndarray[n_a, ] : Ancestors of x. Raises : None Notes : a = maxpot(pot, invariables) """ if not isinstance(adj, np.ndarray): adj = np.array(adj) done = False a = parents(x, adj) while not done: aold = a a = np.union1d(a, parents(a, adj)) done = np.setdiff1d(a, aold).size == 0 if not isinstance(x, collections.Sequence): a = np.setdiff1d(a, np.array([x])) else: a = np.setdiff1d(a, x) return a
def random_pos_orth(shape): rows, cols = shape A = np.zeros((rows, cols), dtype=np.float32) if rows <= cols: v = np.arange(0, cols) ss = cols // rows for rid in range(rows): cid = np.random.choice(v, ss) v = np.setdiff1d(v, cid) Av = np.ones(len(cid)) A[rid, cid] = Av return A / (ss / np.sqrt(ss)) else: ss = rows // cols vr = np.arange(0, rows) vc = np.arange(0, cols) rids = np.random.choice(vr, rows) for rid in rids: if len(vc) == 0: break cid = np.random.choice(vc, 1) vc = np.setdiff1d(vc, cid) Av = np.ones(len(cid)) A[rid, cid] = Av return A
def rebuild_filter(m): global filt latest_indices = numpy.where(m>0)[0] new = numpy.setdiff1d(latest_indices, indices) dropped = numpy.setdiff1d(indices, latest_indices) # If our sat set hasn't changed, no need to rebuild if len(new) == 0 and len(dropped) == 0: return if filt is not None: last_parts = filt.posterior().particles init_mean # The initial PDF for corrections are Gaussian particles around the Klobuchar corrections mean = numpy.array(m) cov = numpy.diag([gps_cov] * 33) init_pdf = pybayes.pdfs.GaussPdf(mean, cov) # The state transition PDF is just Gaussian around the last state cov = [ state_cov ] * 33 A = numpy.identity(33) b = [0] * 33 p_xt_xtp = pybayes.pdfs.MLinGaussCPdf(cov, a, b) # The measurement probability PDF is an EVD, or more precisely a Gumbel Distribution # The implementation allows negative b to indicate Gumbel p_yt_xt = pybayes.pdfs.EVDCpdf([0] * 33, [gps_cov] * 33) filt = pybayes.filters.ParticleFilter(n, init_pdf, p_xt_xtp, p_yt_xt)
def createEvenlySpreadSDRs(numSDRs, n, w): """ Return a set of ~random SDRs that use every available bit an equal number of times, +- 1. """ assert w <= n available = np.arange(n) np.random.shuffle(available) SDRs = [] for _ in xrange(numSDRs): selected = available[:w] available = available[w:] if available.size == 0: remainderSelected = np.random.choice( np.setdiff1d(np.arange(n), selected), size=(w - selected.size), replace= False) selected = np.append(selected, remainderSelected) available = np.setdiff1d(np.arange(n), remainderSelected) np.random.shuffle(available) selected.sort() SDRs.append(selected) return SDRs
def test_bdf_utils_01(self): msg = '1:10 14:20:2 50:40:-1' output = parse_patran_syntax(msg, pound=None) expected = np.array( [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 14, 16, 18, 20, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50] ) error_msg = 'expected equal; A-B=%s; B-A=%s' % ( np.setdiff1d(output, expected), np.setdiff1d(expected, output)) assert np.array_equal(output, expected), error_msg msg = '1:#' output = parse_patran_syntax(msg, pound=5) assert np.array_equal(output, [1, 2, 3, 4, 5]) msg = '#:1' with self.assertRaises(ValueError): output = parse_patran_syntax(msg, pound=None) #assert array_equal(output, [1, 2, 3, 4, 5]) msg = '1:#' output = parse_patran_syntax(msg, pound='5') assert np.array_equal(output, [1, 2, 3, 4, 5]) # should this raise an error? msg = '#:1' #with self.assertRaises(ValueError): output = parse_patran_syntax(msg, pound='5')
def generateDscatter(dds, si=0, fi=1, lbls=None, ndata=3, nofig=False, fig=20, scatterarea=80): """ Generate scatter plot for D and Ds efficiencies """ data = dds.T pp = oahelper.createPareto(dds) paretoidx = np.array(pp.allindices()) nn = dds.shape[0] area = scatterarea * np.ones(nn, ) / 2 area[np.array(pp.allindices())] = scatterarea alpha = 1.0 if dds.shape[1] > ndata: colors = dds[:, ndata] else: colors = np.zeros((nn, 1)) idx = np.unique(colors).astype(int) try: mycmap = brewer2mpl.get_map('Set1', 'qualitative', idx.size).mpl_colors except: mycmap = [matplotlib.cm.jet(ii) for ii in range(4)] pass # For remaining spines, thin out their line and change the black to a # slightly off-black dark grey almost_black = '#202020' figh = plt.figure(fig) # ,facecolor='red') plt.clf() figh.set_facecolor('w') ax = plt.subplot(111) nonparetoidx = np.setdiff1d(range(nn), paretoidx) ax.scatter(data[fi, nonparetoidx], data[si, nonparetoidx], s=.33 * scatterarea, c=(.5, .5, .5), linewidths=0, alpha=alpha, label='Non-pareto design') # print(area) for jj, ii in enumerate(idx): gidx = (colors == ii).nonzero()[0] gp = np.intersect1d(paretoidx, gidx) color = mycmap[jj] cc = [color] * len(gp) print('index %d: %d points' % (ii, gidx.size)) ax.scatter(data[fi, gp], data[si, gp], s=scatterarea, c=cc, linewidths=0, alpha=alpha, label=lbls[jj]) # , zorder=4) plt.draw() if data[si, :].std() < 1e-3: y_formatter = matplotlib.ticker.ScalarFormatter(useOffset=False) ax.yaxis.set_major_formatter(y_formatter) if 0: for xi, al in enumerate(sols): D, Ds, D1 = al.Defficiencies() print('D1 %f Ds %f D %f' % (D1, Ds, D)) tmp = plt.scatter(Ds, D, s=60, color='r') if xi == 0: tmp = plt.scatter(Ds, D, s=60, color='r', label='Strength 3') plt.draw() xlabelhandle = plt.xlabel('$D_s$-efficiency', fontsize=16) plt.ylabel('D-efficiency', fontsize=16) try: oahelper.setWindowRectangle(10, 10, 860, 600) except Exception as e: print('generateDscatter: setWindowRectangle failed') # print(e) pass plt.axis('image') pltlegend = ax.legend(loc=3, scatterpoints=1) # , fontcolor=almost_black) if not nofig: plt.show() # time.sleep(0.01) ax.grid(b=True, which='both', color='0.85', linestyle='-') ax.set_axisbelow(True) if not nofig: plt.draw() hh = dict({'ax': ax, 'xlabelhandle': xlabelhandle, 'pltlegend': pltlegend}) return hh
import numpy as np np.random.seed = 42 n_fraude = 5000 n_normal = 5000 renda_fraude = np.random.normal(1000, 1250, n_fraude * 2) renda_fraude_retirados = renda_fraude[renda_fraude < 700] renda_fraude = np.setdiff1d(renda_fraude, renda_fraude_retirados) valores_alem = np.random.choice(renda_fraude, renda_fraude.size - 5000, replace=False) renda_fraude = np.setdiff1d(renda_fraude, valores_alem) renda_normal = np.random.normal(1000, 1250, n_normal * 2) renda_normal_retirados = renda_normal[renda_normal < 700] renda_normal = np.setdiff1d(renda_normal, renda_normal_retirados) valores_alem = np.random.choice(renda_normal, renda_normal.size - 5000, replace=False) renda_normal = np.setdiff1d(renda_normal, valores_alem) valor_compra_fraude = np.random.normal(2000, 500, n_fraude) valor_compra_normal = np.random.normal(100, 250, n_normal) #https://www.terra.com.br/noticias/dino/a-cada-3-meses-o-brasileiro-gasta-cerca-de-r661-em-compras-pela-internet,70f7a51ef268f3648d98bec463372741o1lsr05w.html media_gastos_fraude = np.random.normal(720, 180, n_fraude) media_gastos_normal = np.random.normal(220, 250, n_normal * 2) media_gastos_normal_retirados = media_gastos_normal[media_gastos_normal <= 0] media_gastos_normal = np.setdiff1d(media_gastos_normal, media_gastos_normal_retirados)
def coherency(X, Y, tapers = None, sampling = 1, fk = None, pad = 2, pval = 0.05, flag = 11, contflag = 0,errorchk = True): # COHERENCY calculates the coherency between two time series, X and Y # # [COH, F, S_X, S_Y,, COH_ERR, SX_ERR, SY_ERR] = ... # COHERENCY(X, Y, TAPERS, SAMPLING, FK, PAD, PVAL, FLAG, CONTFLAG) # # Inputs: X = Time series array in [Space / Trials, Time] form. # Y = Time series array in [Space / Trials, Time] form. # TAPERS = Data tapers in [K, TIME], [N, P, K] or [N, W] form. # Defaults to[N, 5, 9] where N is the duration # of X and Y. # SAMPLING = Sampling rate of time series X, in Hz. # Defaults to 1. # FK = Frequency range to return in Hz in # either[F1, F2] or [F2] form. # In[F2] form, F1 is set to 0. # Defaults to[0, SAMPLING / 2] # PAD = Padding factor for the FFT. # i.e.For N = 500, if PAD = 2, we pad the FFT # to 1024 points; if PAD = 4, we pad the FFT # to 2048 points. # Defaults to 2. # PVAL = P - value to calculate error bars for . # Defaults to 0.05 i.e. 95 % confidence. # # FLAG = 0: calculate COH separately for each channel / trial. # FLAG = 1: calculate COH by pooling across channels / trials. # FLAG = 11 calculation is done as for FLAG = 1 # but the error bars cannot be calculated to save memory. # Defaults to FLAG = 11. # CONTFLAG = 1; There is only a single continuous signal coming in. # Defaults to 0. # # Outputs: COH = Coherency between X and Y in [Space / Trials, Freq]. # F = Units of Frequency axis for COH # S_X = Spectrum of X in [Space / Trials, Freq] form. # S_Y = Spectrum of Y in [Space / Trials, Freq] form. # COH_ERR = Error bars for COH in [Hi / Lo, Space, Freq] # form given by the Jacknife - t interval for PVAL. # SX_ERR = Error bars for S_X. # SY_ERR = Error bars for S_Y. # # Original Matlab code written by: Bijan Pesaran Caltech 1998 # Modified: September 2003. # Python translated by: Seth Richards # Version Date 2020/06/14 # LAG = 0 may not function as intended sX = X.shape nt1 = sX[1] nch1 = sX[0] sY = Y.shape nt2 = sY[1] nch2 = sY[0] if nt1 != nt2: raise Exception('Error: Time series are not the same length') if nch1 != nch2: raise Exception('Error: Time series are incompatible') nt = nt1 nch = nch1 nt = np.true_divide(nt,sampling) if tapers is None: tapers = np.array([nt, 5, 9]) if len(tapers[0]) == 2: n = tapers[0] w = tapers[1] p = n * w k = math.floor(2 * p - 1) tapers = [n, p, k] print(['Using ', tapers, ' tapers.']) if len(tapers[0]) == 3: tapers[0] = math.floor(np.multiply(tapers[0], sampling)) tapers, throwAway = dpsschk(tapers) if fk is None: fk = [0, np.true_divide(sampling, 2)] if np.size(fk) == 1: fk = [0, fk] K = tapers.shape[1] N = tapers.shape[0] if N != nt * sampling: raise Exception('Error: Tapers and time series are not the same length'); nf = np.maximum(256, pad * 2 ** nextPowerOfTwo(N + 1)) temp = np.multiply(sampling, nf) fk = np.true_divide(fk,sampling) nfk = np.floor(np.multiply(fk, temp)) Nf = int(np.diff(nfk)[0]) # Determine outputs f = np.linspace(fk[0], fk[1], Nf) # Default variables if not checking for error coh_err = None SX_err = None SY_err = None if flag == 0: coh = np.zeros([nch, Nf]) S_X = np.zeros([nch, Nf]) S_Y = np.zeros([nch, Nf]) if errorchk: coh_err = np.zeros([2, nch, Nf]) SX_err = np.zeros([2, nch, Nf]) SY_err = np.zeros([2, nch, Nf]) if contflag == 0: m1 = np.sum(X, axis=0) mX = np.transpose(np.true_divide(m1, nch)) mY = np.sum(Y, axis=0) for ch in range(nch): if contflag == 1: tmp1 = np.transpose(X[ch, :]) - np.true_divide(X[ch, :].sum(axis=0), N) tmp2 = np.transpose(Y[ch, :]) - np.true_divide(Y[ch, :].sum(axis=0), N) else: tmp1 = np.transpose(X[ch, :]) - mX tmp2 = np.transpose(Y[ch, :]) - mY extendedArrayX = extendArrayWithCurrentData(tmp1, ch, K) inputArrayX = np.multiply(tapers[:, 0:K], extendedArrayX) Xk = np.fft.fft(np.transpose(inputArrayX), int(nf)) lowerBoundX = int(nfk[0]) upperBoundX = int(nfk[1]) Xk = Xk[:,lowerBoundX:upperBoundX] extendedArrayY = extendArrayWithCurrentData(tmp2, ch, K) inputArrayY = np.multiply(tapers[:, 0:K], extendedArrayY) Yk = np.fft.fft(np.transpose(inputArrayY), int(nf)) lowerBoundY = int(nfk[0]) upperBoundY = int(nfk[1]) Yk = Yk[:, lowerBoundY:upperBoundY] SXk = (Xk * np.conj(Xk)).real SYk = (Yk * np.conj(Yk)).real SXTemp = np.sum(SXk, axis=0) S_X[ch, :] = np.transpose(np.true_divide(SXTemp, K)) SYTemp = np.sum(SYk, axis=0) S_Y[ch, :] = np.true_divide(SYTemp, K) cohTemp = np.sum(np.multiply(Xk, np.conj(Yk)), axis=0) cohTemp1 = np.sqrt(np.multiply(S_X[ch, :], S_Y[ch, :])) coh[ch, :] = np.true_divide(np.true_divide(cohTemp.real, K), cohTemp1.real) if errorchk: # Estimate error bars using Jacknife jcoh = np.zeros([K, Nf]) jXlsp = np.zeros([K, Nf]) jYlsp = np.zeros([K, Nf]) for ik in range(K): tempArray = range(0, K) indices = np.setdiff1d(tempArray, ik) Xj = Xk[indices, :] Yj = Yk[indices, :] tmpx = np.true_divide(np.sum(np.multiply(Xj,np.conj(Xj)),axis=0), K-1) tmpy = np.true_divide(np.sum(np.multiply(Yj,np.conj(Yj)),axis=0), K-1) jcohTemp = np.sum(np.multiply(Xj, np.conj(Yj)), axis=0) jcoh[ik, :] = np.arctanh(np.true_divide(np.abs(np.true_divide(jcohTemp,(K - 1))), np.sqrt(np.multiply(tmpx,tmpy)))).real jXlsp[ik, :] = np.log(tmpx.real) jYlsp[ik, :] = np.log(tmpy.real) lsigX = np.multiply(np.sqrt(K - 1), np.std(jXlsp, axis=0)) lsigY = np.multiply(np.sqrt(K - 1), np.std(jYlsp, axis=0)) lsigXY = np.multiply(np.sqrt(K - 1), np.std(jcoh, axis=0)) crit = t.ppf(1 - np.true_divide(pval,2), K - 1) # Determine the scaling factor coh_err[0, ch, :] = np.tanh(np.arctanh(np.abs(coh)) + np.multiply(crit, lsigXY)) coh_err[1, ch, :] = np.tanh(np.arctanh(np.abs(coh)) - np.multiply(crit, lsigXY)) SX_err[0, ch, :] = np.exp(np.log(S_X) + np.multiply(crit, lsigX)) SX_err[1, ch, :] = np.exp(np.log(S_X) - np.multiply(crit, lsigX)) SY_err[0, ch, :] = np.exp(np.log(S_Y) + np.multiply(crit, lsigY)) SY_err[1, ch, :] = np.exp(np.log(S_Y) - np.multiply(crit, lsigY)) if flag == 1: # Pooling across trials Xk = np.zeros([nch * K, Nf], dtype=np.complex) Yk = np.zeros([nch * K, Nf], dtype=np.complex) if not contflag: mX = np.transpose(np.true_divide(np.sum(X, axis=0), nch)) mY = np.transpose(np.true_divide(np.sum(Y, axis=0), nch)) for ch in range(nch): if contflag: tmp1 = np.transpose(X[ch, :]) - np.true_divide(np.sum(X[ch, :]), N) tmp2 = np.transpose(Y[ch, :]) - np.true_divide(np.sum(Y[ch, :]), N) else: tmp1 = np.transpose(X[ch, :]) - mX tmp2 = np.transpose(Y[ch, :]) - mY extendedArrayx = extendArrayWithCurrentData(tmp1, ch, K) inputArrayx = np.multiply(tapers[:, 0:K], extendedArrayx) xk = np.fft.fft(np.transpose(inputArrayx), int(nf)) Xk[int(ch * K):int((ch+1) * K), :] = xk[:, int(nfk[0]): int(nfk[1])] extendedArrayy = extendArrayWithCurrentData(tmp2, ch, K) inputArrayy = np.multiply(tapers[:, 0:K], extendedArrayy) yk = np.fft.fft(np.transpose(inputArrayy), int(nf)) Yk[int(ch * K): int((ch+1) * K), :] = yk[:, int(nfk[0]): int(nfk[1])] S_X = np.true_divide(np.sum(np.multiply(Xk,np.conj(Xk)), axis=0), K) S_Y = np.true_divide(np.sum(np.multiply(Yk,np.conj(Yk)), axis=0), K) cohTemp = np.sqrt(np.multiply(S_X, S_Y)) coh = np.true_divide(np.true_divide(np.sum(np.multiply(Xk, np.conj(Yk)), axis=0), K), cohTemp) if errorchk: # Estimate error bars using Jacknife jcoh = np.zeros([nch * K, Nf]) jXlsp = np.zeros([nch * K, Nf]) jYlsp = np.zeros([nch * K, Nf]) coh_err = np.zeros([2, Nf]) SX_err = np.zeros([2, Nf]) SY_err = np.zeros([2, Nf]) for ik in range(nch*K): indices = np.setdiff1d(np.multiply(range(0, nch), K), ik) Xj = Xk[indices, :] Yj = Yk[indices, :] tx = np.true_divide(np.sum(np.multiply(Xj, np.conj(Xj)), axis=0), (nch * K - 1)) ty = np.true_divide(np.sum(np.multiply(Yj, np.conj(Yj)), axis=0), (nch * K - 1)) # Use atanh variance stabilizing transformation for coherence jcohTemp = np.true_divide(np.sum(np.multiply(Xj,np.conj(Yj)), axis=0), (nch * K - 1)) jcohTemp1 = np.true_divide(jcohTemp, np.sqrt(np.multiply(tx, ty))) jcoh[ik, :] = np.arctanh(np.abs(jcohTemp1)) jXlsp[ik, :] = np.log(tx.real) jYlsp[ik, :] = np.log(ty.real) lsigX = np.multiply(np.sqrt(nch * K - 1), np.std(jXlsp, axis=0)) lsigY = np.multiply(np.sqrt(nch * K - 1), np.std(jYlsp, axis=0)) lsigXY = np.multiply(np.sqrt(nch * K - 1), np.std(jcoh, axis=0)) crit = t.ppf(1 - np.true_divide(pval, 2), K * nch - 1) # Determine the scaling factor coh_err[0, :] = np.tanh(np.arctanh(np.abs(coh)) + np.multiply(crit, lsigXY)) coh_err[1, :] = np.tanh(np.arctanh(np.abs(coh)) - np.multiply(crit, lsigXY)) SX_err[0, :] = np.exp(np.log(S_X.real) + np.multiply(crit, lsigX)) SX_err[1, :] = np.exp(np.log(S_X.real) - np.multiply(crit, lsigX)) SY_err[0, :] = np.exp(np.log(S_Y.real) + np.multiply(crit, lsigY)) SY_err[1, :] = np.exp(np.log(S_Y.real) - np.multiply(crit, lsigY)) if flag == 11: # Pooling across trials saving memory S_X = np.zeros([1, Nf]) S_Y = np.zeros([1, Nf]) coh = np.zeros([1, Nf]) if not contflag: mX = np.transpose(np.true_divide(np.sum(X, axis=0), nch)) mY = np.transpose(np.true_divide(np.sum(Y, axis=0), nch)) for ch in range(nch): if contflag: tmp1 = np.transpose(X[ch, :]) - np.true_divide(np.sum(X[ch, :], axis=0), N) tmp2 = np.transpose(Y[ch, :]) - np.true_divide(np.sum(Y[ch, :], axis=0), N) else: tmp1 = np.transpose(X[ch, :]) - mX tmp2 = np.transpose(Y[ch, :]) - mY extendedArrayx = extendArrayWithCurrentData(tmp1, ch, K) inputArrayx = np.multiply(tapers[:, 0:K], extendedArrayx) Xk = np.fft.fft(np.transpose(inputArrayx), int(nf)) extendedArrayy = extendArrayWithCurrentData(tmp2, ch, K) inputArrayy = np.multiply(tapers[:, 0:K], extendedArrayy) Yk = np.fft.fft(np.transpose(inputArrayy), int(nf)) S_XTemp = Xk[:,int(nfk[0]):int(nfk[1])] S_XTemp2 = np.sum(np.multiply(S_XTemp, np.conj(S_XTemp)), axis=0) S_X = S_X + np.true_divide(np.true_divide(S_XTemp2, K), nch) S_X = S_X.real S_YTemp = Yk[:, int(nfk[0]) : int(nfk[1])] S_Y = S_Y + np.true_divide(np.true_divide(np.sum(np.multiply(S_YTemp,np.conj(S_YTemp)), axis = 0), K), nch) S_Y = S_Y.real coh = coh + np.true_divide(np.true_divide(np.sum(np.multiply(S_XTemp,np.conj(S_YTemp)), axis = 0), K), nch) coh = np.true_divide(coh, (np.sqrt(np.multiply(S_X, S_Y)))) S_X = S_X.real S_Y = S_Y.real return coh, f, S_X, S_Y, coh_err, SX_err, SY_err
end = GenotypeData.chr_regions[i-1][1] chrpositions = GenotypeData.positions[start:end] matchedAccInd = numpy.where(numpy.in1d(chrpositions, perchrtarSNPpos))[0] + start matchedTarInd = numpy.where(numpy.in1d(perchrtarSNPpos, chrpositions))[0] matchedTarGTs = targetSNPs[2][perchrTarInd[matchedTarInd]].values TarGTs = numpy.zeros(len(matchedTarGTs), dtype="int8") TarGTs[numpy.where(matchedTarGTs == "1/1")[0]] = 1 TarGTs[numpy.where(matchedTarGTs == "0/1")[0]] = 2 AccTarSNPs = AccSNPs[matchedAccInd] ImpPos = numpy.where(AccTarSNPs < 0)[0] nmat_all = numpy.where(AccTarSNPs != TarGTs)[0] matInd = numpy.where(AccTarSNPs == TarGTs)[0] nmatInd = numpy.setdiff1d(nmat_all, ImpPos) nmatTarInd = numpy. # MatHetCount = MatHetCount + numpy.unique(numpy.array(matchedTarGTs[matInd]), return_counts=True)[1][1] # NmatHetCount = NmatHetCount + numpy.unique(numpy.array(matchedTarGTs[nmatInd]), return_counts=True)[1][1] nmat = GenotypeData.positions[matchedAccInd[nmatInd]] mat = GenotypeData.positions[matchedAccInd[matInd]] nmatGT = TarGTs[] matGT = AccTarSNPs[matInd] #TotMatPos = numpy.append(TotMatPos,matchedAccInd[matInd]) #TotNonMatPos = numpy.append(TotNonMatPos, matchedAccInd[nmatInd]) logging.info("%d NonMat, %d Mat, %s TotInfo", len(nmat), len(mat), len(AccTarSNPs)-len(ImpPos)) TotNonMatPos = numpy.append(TotNonMatPos, nmat)
X_u_meas = np.hstack((X2.flatten()[:, None], T2.flatten()[:, None])) u_meas = Exact2.flatten()[:, None] # Training measurements, which are randomly sampled spatio-temporally Split_TrainVal = 0.8 N_u_train = int(N_u_s * N_u_t * Split_TrainVal) idx_train = np.random.choice(X_u_meas.shape[0], N_u_train, replace=False) X_u_train = X_u_meas[idx_train, :] u_train = u_meas[idx_train, :] # Validation Measurements, which are the rest of measurements idx_val = np.setdiff1d(np.arange(X_u_meas.shape[0]), idx_train, assume_unique=True) X_u_val = X_u_meas[idx_val, :] u_val = u_meas[idx_val, :] # Dirichlet Boundary Conditions : u(0, t), u(l, t), u_x(0, t), u_x(l, t) # Note: Due to a lack of accurate values for derivatives, u_x(0, t), u_x(l, t) are intentionally left out. X_bc1 = np.hstack( (X[:, 0].flatten()[:, None], T[:, 0].flatten()[:, None])) # u(0, t) X_bc2 = np.hstack( (X[:, -1].flatten()[:, None], T[:, -1].flatten()[:, None])) # u(l, t) X_bc = np.vstack((X_bc1, X_bc2)) u_bc = np.vstack( (Exact[:, 0].flatten()[:, None], Exact[:, -1].flatten()[:, None]))
def load_protein_mrna(file_protein_quantification='',folder_data='', filter_genes="_genes_filtered",filter_lines="_lines_filtered_unique",field_data="Reporter intensity corrected_regbatch",\ only_protein=False,folder_data_rna='',file_rna_quantification=''): # folder_data_rna='/Users/mirauta/data/RNA/hipsci/' # file_rna_quantification='HipSci.featureCounts.genes.counts.stranded.tsv_counts.tsv' # folder_data='/Users/mirauta/data/MS/hipsci/TMT/phenotypes/' # file_protein_quantification='hipsci.proteomics.maxquant.uniprot.TMT_batch_14.20170517' # field_data='Reporter intensity corrected_regbatch' # filter_lines='_lines_filtered_unique' # filter_genes='_genes_filtered' data = {} data['protein_intensity'] = pandas.read_table( folder_data + file_protein_quantification + "_protein_" + field_data + filter_lines + filter_genes + ".txt", sep='\t', index_col=0).transpose() data['peptide_intensity'] = pandas.read_table( folder_data + file_protein_quantification + "_peptide_" + field_data + filter_lines + filter_genes + ".txt", sep='\t', index_col=0).transpose() data['peptide_meta'] = pandas.read_table( folder_data + file_protein_quantification + "_peptide_metadata" + filter_genes + ".txt", sep='\t').set_index('ensembl_gene_id', drop=False).transpose() data['peptide_protein'] = pandas.read_table( folder_data + file_protein_quantification + "_peptide_metadata" + filter_genes + ".txt", sep='\t').set_index('ensembl_gene_id', drop=False).transpose() data['protein_meta'] = pandas.read_table( folder_data + file_protein_quantification + "_protein_metadata" + filter_genes + ".txt", sep='\t').set_index('ensembl_gene_id', drop=False).transpose() data['line_meta'] = pandas.read_table( folder_data + file_protein_quantification + "_lines_metadata" + filter_lines + ".txt", sep='\t').set_index('lines', drop=False) data['batch_mat']=pandas.DataFrame(data=np.vstack([data['line_meta']['batch']==tmt for tmt in np.unique(data['line_meta']['batch'])]).astype(float).T,\ columns=np.unique(data['line_meta']['batch']), index=data['line_meta']['lines']) if only_protein: return data data['rna_counts'] = pandas.read_table(folder_data_rna + file_rna_quantification, sep='\t', index_col=0).transpose() data['rna_counts'].index = np.array( [l.split('/')[3].split('.')[0] for l in data['rna_counts'].index]) common_lines = np.sort( np.intersect1d(data['protein_intensity'].index, data['rna_counts'].index)) print(common_lines.shape) diff_lines = np.setdiff1d(data['protein_intensity'].index, data['rna_counts'].index) print(diff_lines.shape) common_genes=np.sort(np.intersect1d(data['peptide_meta'].transpose()['ensembl_gene_id'],\ np.intersect1d(data['protein_meta'].transpose()['ensembl_gene_id'],data['rna_counts'].columns.values))) print(common_genes.shape) ## select common lines for x in [ 'protein_intensity', 'peptide_intensity', 'line_meta', 'batch_mat', 'rna_counts' ]: data[x] = data[x].transpose()[common_lines].transpose() # # temp=data[x].transpose() # data[x]=pandas.DataFrame(data=np.array([temp[ll] for ll in common_lines]), index=common_lines, columns=data[x].columns.values) for x in ['protein_meta', 'peptide_meta', 'peptide_protein', 'rna_counts']: data[x] = data[x].transpose()[np.in1d(data[x].transpose().index, common_genes)].transpose() return data
xvalues = pd.read_csv(x_url, header=None).values.astype(float); yvalues = pd.read_csv(y_url, header=None).values.astype(int); pVal = xvalues.shape[1] / 2; X_origin = xvalues[:, 0:pVal]; X_knockoff = xvalues[:, pVal:]; print(xvalues.shape) print(yvalues.shape) allIndices = range(0, len(yvalues)); allRoc = np.empty((0, iterNum), float); for testIndices in testIndicesList: trainIndices = np.setdiff1d(allIndices, testIndices); print('trainIndices: '+str(len(trainIndices))+' '+str(trainIndices)) print('testIndices: ' + str(len(testIndices)) + ' ' + str(testIndices)) x3D_train = np.zeros((len(trainIndices), pVal, 2)); x3D_train[:, :, 0] = X_origin[trainIndices,:]; x3D_train[:, :, 1] = X_knockoff[trainIndices,:]; label_train = yvalues[trainIndices]; x3D_test = np.zeros((len(testIndices), pVal, 2)); x3D_test[:, :, 0] = X_origin[testIndices, :]; x3D_test[:, :, 1] = X_knockoff[testIndices, :]; label_test = yvalues[testIndices]; coeff = 0.05*np.sqrt(2.0 * np.log(pVal) / x3D_train.shape[0]); outputDir = os.path.join(dataDir, dataType, 'result_2layer_epoch' + str(num_epochs) + '_batch' + str(batch_size) + '_knockoff1');
except ImportError: # Python 2 import urllib urllib.urlretrieve(ftp_url, weights_filename(shape, n_samples)) # TO BE REMOVED END # Load true weights if has_data: WEIGHTS_TRUTH = np.load(weights_filename(shape, n_samples)) # Ensure that train dataset is balanced tr = np.hstack([ np.where(y.ravel() == 1)[0][:int(n_train / 2)], np.where(y.ravel() == 0)[0][:int(n_train / 2)] ]) te = np.setdiff1d(np.arange(y.shape[0]), tr) X = X3d.reshape((n_samples, np.prod(beta3d.shape))) Xtr = X[tr, :] ytr = y[tr] Xte = X[te, :] yte = y[te] beta_start = weights.RandomUniformWeights().get_weights(Xtr.shape[1]) # check that ytr is balanced #assert ytr.sum() / ytr.shape[0] == 0.5 #assert yte.sum() / yte.shape[0] == 0.53500000000000003 # Dataset with intercept Xtr_i = np.c_[np.ones((Xtr.shape[0], 1)), Xtr] Xte_i = np.c_[np.ones((Xte.shape[0], 1)), Xte]
def computePrior(s,G,messageBlocks,L,M,p0,K,tau,Phat,numBPiter,case): q = np.zeros(s.shape,dtype=float) p1 = p0*np.ones(s.shape,dtype=float) temp_beta = np.zeros((L*M, 1)) for iter in range(numBPiter): # Translate the effective observation into PME. For the first iteration of BP, use the uninformative prior p0 if case==1: for i in range(L): temp_beta[i*M:(i+1)*M] = (p1[i*M:(i+1)*M]*np.exp(-(s[i*M:(i+1)*M]-np.sqrt(Phat))**2/(2*tau[i]**2)))/ \ (p1[i*M:(i+1)*M]*np.exp(-(s[i*M:(i+1)*M]-np.sqrt(Phat))**2/(2*tau[i]**2)) + \ (1-p1[i*M:(i+1)*M])*np.exp(-s[i*M:(i+1)*M]**2/(2*tau[i]**2))).astype(float) \ .reshape(-1, 1) else: temp_beta = (p1*np.exp(-(s-np.sqrt(Phat))**2/(2*tau**2)))/ (p1*np.exp(-(s-np.sqrt(Phat))**2/(2*tau**2)) + (1-p1)*np.exp(-s**2/(2*tau**2))).astype(float).reshape(-1, 1) # Reshape PME into an LxM matrix Beta = temp_beta.reshape(L,-1) #print(Beta.shape,np.sum(Beta,axis=1)) Beta = Beta/(np.sum(Beta,axis=1).reshape(L,-1)) # Rotate PME 180deg about y-axis Betaflipped = np.hstack((Beta[:,0].reshape(-1,1),np.flip(Beta[:,1:],axis=1))) # Compute and store all FFTs BetaFFT = np.fft.fft(Beta) BetaflippedFFT = np.fft.fft(Betaflipped) for i in range(L): if messageBlocks[i]: # Parity sections connected to info section i parityIndices = np.where(G[i])[0] BetaIFFTprime = np.empty((0,0)).astype(float) for j in parityIndices: # Other info blocks connected to this parity block messageIndices = np.setdiff1d(np.where(G[j])[0],i) BetaFFTprime = np.vstack((BetaFFT[j],BetaflippedFFT[messageIndices,:])) # Multiply the relevant FFTs BetaFFTprime = np.prod(BetaFFTprime,axis=0) # IFFT BetaIFFTprime1 = np.fft.ifft(BetaFFTprime).real BetaIFFTprime = np.vstack((BetaIFFTprime,BetaIFFTprime1)) if BetaIFFTprime.size else BetaIFFTprime1 BetaIFFTprime = np.prod(BetaIFFTprime,axis=0) else: BetaIFFTprime = np.empty((0,0)).astype(float) # Information sections connected to this parity section (assuming no parity over parity sections) Indices = np.where(G[i])[0] # FFT BetaFFTprime = BetaFFT[Indices,:] # Multiply the relevant FFTs BetaFFTprime = np.prod(BetaFFTprime,axis=0) # IFFT BetaIFFTprime = np.fft.ifft(BetaFFTprime).real # Normalize to ensure it sums to one p1[i*M:(i+1)*M] = (BetaIFFTprime/np.sum(BetaIFFTprime)).reshape(-1,1) p1[i*M:(i+1)*M] = 1-(1-p1[i*M:(i+1)*M] )**K # Normalize to ensure sum of priors within a section is K (optional) #p1[i*M:(i+1)*M] = p1[i*M:(i+1)*M]*K/np.sum(p1[i*M:(i+1)*M]) q = np.minimum(p1,1) return q
def main(random_state=1, test_size=0.2, n_instances=1000000, out_dir='continuous'): # create logger logger = get_logger('log.txt') # columns to use cols = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] # data dtypes for each column dtypes = {c: np.float32 for c in cols} dtypes[0] = np.uint8 # retrieve dataset start = time.time() df = pd.read_csv('day_0', sep='\t', header=None, usecols=cols, dtype=dtypes, nrows=n_instances) logger.info('reading in dataset...{:.3f}s'.format(time.time() - start)) logger.info('{}'.format(df)) logger.info('Memory usage: {:,} bytes'.format( df.memory_usage(deep=True).sum())) # get numpy array X = df.values df = None # impute missing values with the mean logger.info('imputing missing values with the mean...') assert np.isnan(X[:, 0]).sum() == 0 col_mean = np.nanmean(X, axis=0) nan_indices = np.where(np.isnan(X)) X[nan_indices] = np.take(col_mean, nan_indices[1]) # move the label column in X to the last column logger.info('moving label column to the last column...') y = X[:, 0].copy().reshape(-1, 1) X = np.delete(X, 0, 1) X = np.hstack([X, y]) # split into train and test logger.info('splitting into train and test sets...') indices = np.arange(X.shape[0]) n_train_samples = int(len(indices) * (1 - test_size)) np.random.seed(random_state) train_indices = np.random.choice(indices, size=n_train_samples, replace=False) test_indices = np.setdiff1d(indices, train_indices) train = X[train_indices] test = X[test_indices] logger.info('train.shape: {}, label sum: {}'.format( train.shape, train[:, -1].sum())) logger.info('test.shape: {}, label sum: {}'.format(test.shape, test[:, -1].sum())) # save to numpy format logger.info('saving...') os.makedirs(out_dir, exist_ok=True) np.save(os.path.join(out_dir, 'train.npy'), train) np.save(os.path.join(out_dir, 'test.npy'), test)
def init(mol, prm): natoms = mol.numAtoms charge = mol.charge.astype(np.float64) impropers = mol.impropers angles = mol.angles dihedrals = mol.dihedrals # if len(impropers) == 0: # logger.warning('No impropers are defined in the input molecule. Check if this is correct. If not, use guessAnglesAndDihedrals.') # if len(angles) == 0: # logger.warning('No angles are defined in the input molecule. Check if this is correct. If not, use guessAnglesAndDihedrals.') # if len(dihedrals) == 0: # logger.warning('No dihedrals are defined in the input molecule. Check if this is correct. If not, use guessAnglesAndDihedrals.') if prm.urey_bradley_types: for type in prm.urey_bradley_types: if prm.urey_bradley_types[type].k != 0: logger.warning( 'Urey-Bradley types found in the parameters but are not implemented in FFEvaluate and will be ignored!' ) break uqtypes, typeint = np.unique(mol.atomtype, return_inverse=True) sigma = np.zeros(len(uqtypes), dtype=np.float32) sigma14 = np.zeros(len(uqtypes), dtype=np.float32) epsilon = np.zeros(len(uqtypes), dtype=np.float32) epsilon14 = np.zeros(len(uqtypes), dtype=np.float32) for i, type in enumerate(uqtypes): sigma[i] = prm.atom_types[type].sigma epsilon[i] = prm.atom_types[type].epsilon sigma14[i] = prm.atom_types[type].sigma_14 epsilon14[i] = prm.atom_types[type].epsilon_14 nbfix = np.ones((len(prm.nbfix_types), 6), dtype=np.float64) * -1 for i, nbf in enumerate(prm.nbfix_types): if nbf[0] in uqtypes and nbf[1] in uqtypes: idx1 = np.where(uqtypes == nbf[0])[0] idx2 = np.where(uqtypes == nbf[1])[0] rmin, eps, rmin14, eps14 = prm.atom_types[nbf[0]].nbfix[nbf[1]] sig = rmin * 2**(-1 / 6) # Convert rmin to sigma sig14 = rmin14 * 2**(-1 / 6) nbfix[i, :] = [idx1, idx2, eps, sig, eps14, sig14] # 1-2 and 1-3 exclusion matrix # TODO: Don't read bonds / angles / dihedrals from mol. Read from forcefield excl_list = [[] for _ in range(natoms)] bond_pairs = [[] for _ in range(natoms)] bond_params = [[] for _ in range(natoms)] for bond in mol.bonds: types = tuple(uqtypes[typeint[bond]]) bond = sorted(bond) excl_list[bond[0]].append(bond[1]) bond_pairs[bond[0]].append(bond[1]) bond_params[bond[0]].append(prm.bond_types[types].k) bond_params[bond[0]].append(prm.bond_types[types].req) angle_params = np.zeros((mol.angles.shape[0], 2), dtype=np.float32) for idx, angle in enumerate(mol.angles): first, second = sorted([angle[0], angle[2]]) excl_list[first].append(second) types = tuple(uqtypes[typeint[angle]]) angle_params[idx, :] = [ prm.angle_types[types].k, radians(prm.angle_types[types].theteq) ] excl_list = [list(np.unique(x)) for x in excl_list] # 1-4 van der Waals scaling matrix s14_atom_list = [[] for _ in range(natoms)] s14_value_list = [[] for _ in range(natoms)] # 1-4 electrostatic scaling matrix e14_atom_list = [[] for _ in range(natoms)] e14_value_list = [[] for _ in range(natoms)] dihedral_params = [[] for _ in range(mol.dihedrals.shape[0])] alreadyadded = {} for idx, dihed in enumerate(mol.dihedrals): # Avoid readding duplicate dihedrals stringrep = ' '.join(map(str, sorted(dihed))) if stringrep in alreadyadded: continue alreadyadded[stringrep] = True ty = tuple(uqtypes[typeint[dihed]]) if ty in prm.dihedral_types: dihparam = prm.dihedral_types[ty] elif ty[::-1] in prm.dihedral_types: dihparam = prm.dihedral_types[ty[::-1]] else: raise RuntimeError( 'Could not find type {} in dihedral_types'.format(ty)) i, j = sorted([dihed[0], dihed[3]]) s14_atom_list[i].append(j) s14_value_list[i].append(dihparam[0].scnb) e14_atom_list[i].append(j) e14_value_list[i].append(dihparam[0].scee) for dip in dihparam: dihedral_params[idx].append(dip.phi_k) dihedral_params[idx].append(radians(dip.phase)) dihedral_params[idx].append(dip.per) improper_params = np.zeros((mol.impropers.shape[0], 3), dtype=np.float32) graph = improperGraph(mol.impropers, mol.bonds) for idx, impr in enumerate(mol.impropers): ty = tuple(uqtypes[typeint[impr]]) try: imprparam, impr_type = getImproperParameter(ty, prm) except: try: # In some cases AMBER does not store the center as 3rd atom (i.e. if it's index is 0). Then you need to detect it center = detectImproperCenter(impr, graph) notcenter = np.setdiff1d(impr, center) notcenter = sorted(uqtypes[typeint[notcenter]]) ty = tuple(notcenter[:2] + [ uqtypes[typeint[center]], ] + notcenter[2:]) imprparam, impr_type = getImproperParameter(ty, prm) except: raise RuntimeError( 'Could not find improper parameters for atom types {}'. format(ty)) if impr_type == 'improper_periodic_types': improper_params[idx, :] = [ imprparam.phi_k, radians(imprparam.phase), imprparam.per ] elif impr_type == 'improper_types': improper_params[idx, :] = [ imprparam.psi_k, radians(imprparam.psi_eq), 0 ] excl = nestedListToArray(excl_list, dtype=np.int64, default=-1) s14a = nestedListToArray(s14_atom_list, dtype=np.int64, default=-1) e14a = nestedListToArray(e14_atom_list, dtype=np.int64, default=-1) s14v = nestedListToArray(s14_value_list, dtype=np.float32, default=np.nan) e14v = nestedListToArray(e14_value_list, dtype=np.float32, default=np.nan) bonda = nestedListToArray(bond_pairs, dtype=np.int64, default=-1) bondv = nestedListToArray(bond_params, dtype=np.float32, default=np.nan) dihedral_params = nestedListToArray(dihedral_params, dtype=np.float32, default=np.nan) ELEC_FACTOR = 1 / (4 * const.pi * const.epsilon_0) # Coulomb's constant ELEC_FACTOR *= const.elementary_charge**2 # Convert elementary charges to Coulombs ELEC_FACTOR /= const.angstrom # Convert Angstroms to meters ELEC_FACTOR *= const.Avogadro / (const.kilo * const.calorie ) # Convert J to kcal/mol return typeint, excl, nbfix, sigma, sigma14, epsilon, epsilon14, s14a, e14a, s14v, e14v, bonda, bondv, ELEC_FACTOR, \ charge, angles, angle_params, dihedrals, dihedral_params, impropers, improper_params
valid_rate = 0.1 batch_size = 4 trans = transforms.Compose([ transforms.Resize(size=(1500, 1500)), transforms.ToTensor() ]) dataset = ImageFolder(DATA_PATH, transform=trans) train_size = int(train_rate * len(dataset)) valid_size = int(valid_rate * len(dataset)) # On split avec train-valid-test = 80-10-10 all_indexes = np.arange(len(dataset)) train_indexes = np.random.choice(all_indexes, size=train_size, replace=False) all_indexes = np.setdiff1d(all_indexes, train_indexes) valid_indexes = np.random.choice(all_indexes, size=valid_size, replace=False) test_indexes = np.setdiff1d(all_indexes, valid_indexes) np.random.shuffle(train_indexes) np.random.shuffle(valid_indexes) np.random.shuffle(test_indexes) train_subset = Subset(dataset, train_indexes) valid_subset = Subset(dataset, valid_indexes) test_subset = Subset(dataset, test_indexes) # Les dataloaders train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=False) valid_loader = DataLoader(valid_subset, batch_size=batch_size, shuffle=False) test_loader = DataLoader(test_subset, batch_size=batch_size, shuffle=False)
def cracks(boxes, classes, scores, marking_list): left_wheelpath, right_wheelpath, non_wheelpath, left_center_path, right_center_path, left_non_wheelpath, right_non_wheelpath = wheelpath( marking_list) right_center_list_b2 = [] left_center_list_b2 = [] left_center_list_b3 = [] right_center_list_b3 = [] right_list_b = [] left_list_b = [] center_b = list() right_list_l = [] left_list_l = [] box_list_right_load = [] box_list_left_load = [] b1_box_list = list() b1_box_list_longi = list() b1_class_list = list() box_list_block = list() rm_list = list() flag_LC3 = 0 flag_BC2 = 0 flag_BC3 = 0 box_left = np_box_list.BoxList(np.array(([left_wheelpath]))) box_right = np_box_list.BoxList(np.array(([right_wheelpath]))) box_non_wheel = np_box_list.BoxList(np.array(([non_wheelpath]))) box_left_center = np_box_list.BoxList(np.array(([left_center_path]))) box_right_center = np_box_list.BoxList(np.array(([right_center_path]))) box_non_wheel_left = np_box_list.BoxList(np.array(([left_non_wheelpath]))) box_non_wheel_right = np_box_list.BoxList(np.array( ([right_non_wheelpath]))) for i in range(boxes.shape[0]): label = classes[i] box = boxes[i] box2 = np_box_list.BoxList(np.array(([box]))) print('box2', box2) #Check for box overlaps with wheepaths and non-wheelpaths ioa_1 = np.transpose(np_box_list_ops.ioa(box_non_wheel, box2)) #non-wheelpath ioa_2 = np.transpose(np_box_list_ops.ioa(box_left, box2)) #Left wheelpath ioa_3 = np.transpose(np_box_list_ops.ioa(box_right, box2)) #Right wheelpath ioa_4 = np.transpose(np_box_list_ops.ioa(box_left_center, box2)) #Left center path ioa_5 = np.transpose(np_box_list_ops.ioa(box_right_center, box2)) #Right center path ioa_7 = np.transpose(np_box_list_ops.ioa(box_non_wheel_left, box2)) #Left nonwheelpath ioa_8 = np.transpose(np_box_list_ops.ioa(box_non_wheel_right, box2)) #Right nonwheelpath if classes[i] == 3: ioa_6 = np.transpose(np_box_list_ops.ioa(box2, box_non_wheel)) #LC3 if ioa_6 > 0.8: flag_LC3 = 1 if 3 not in classes: flag_LC3 = 1 #Check detected load cracks in the non wheelpath and change the label to BC1 if (ioa_1 > 0.8 or ioa_7 > 0.7) or ioa_8 > 0.7: if label in [1, 2, 3, 4]: classes[i] = 5 #creating a list of BC1 boxes and classes if (classes[i] == 6 or classes[i] == 5): if classes[i] == 5: b1_box_list.append(boxes[i]) b1_class_list.append(classes[i]) #Calculate the area of the bbox area = np_box_ops.area(np.array(([box]))) # appending the BC2/BC3 detections to left,right and center lists of the pavement dependig on their location and area. if classes[i] == 6: if ioa_4 > 0.7: if area > 190000: left_center_list_b2.append(classes[i]) #left_center_list_b2 box_list_block.append(boxes[i]) else: left_center_list_b3.append(classes[i]) #left_center_list_b3 box_list_block.append(boxes[i]) elif ioa_5 > 0.7: if area > 190000: right_center_list_b2.append(classes[i]) box_list_block.append(boxes[i]) #right_center_list_b2 else: right_center_list_b3.append(classes[i]) #right_center_list_b3 box_list_block.append(boxes[i]) #To check if the biggest box is in the center of the image if ((ioa_4 > 0.5 and ioa_5 > 0.5) or (area > 400000)): if area > 200000: if ioa_7 > 0.2 or ioa_8 > 0.2: center_b.append(classes[i]) #Check for load cracks else: #left_wheelpath if ioa_2 > 0.4: if classes[i] != 3: left_list_l.append(classes[i]) box_list_left_load.append(boxes[i]) #right_wheelpath elif ioa_3 > 0.4: if classes[i] != 3: right_list_l.append(classes[i]) box_list_right_load.append(boxes[i]) #special case LC3 with not much overlap because of wide box if classes[i] == 3: if ioa_2 > 0.3: left_list_l.append(classes[i]) box_list_left_load.append(boxes[i]) if ioa_3 > 0.3: right_list_l.append(classes[i]) box_list_right_load.append(boxes[i]) #Part of the code to post-process BC2/BC3 into BC2 and BC3 separately #check if LC4 is not present in detections if np.setdiff1d(classes, [1, 2, 3, 5, 6]).shape[0] == 0: if flag_LC3 == 1: #Checking if the nuumber of detected boxes is greater than 3 if (len(left_center_list_b2 + left_center_list_b3 + right_center_list_b2 + right_center_list_b3)) >= 4: if (len(left_center_list_b2 + left_center_list_b3) >= 1 and len(right_center_list_b2 + right_center_list_b3) >= 1): print('Block crack level 3 detected') flag_BC3 = 1 else: print('Block crack level 2 detected') flag_BC2 = 1 #check if BC2 is detected wrt area of the detected box being bigger than BC3 elif ((len(left_center_list_b2) != 0) or (len(right_center_list_b2) != 0)): if len(center_b) >= 1: print('Block crack level 2 detected') flag_BC2 = 1 #Checking if the nuumber of detected boxes is greater than or equal to 2 elif (len(left_center_list_b2 + left_center_list_b3 + right_center_list_b2 + right_center_list_b3)) >= 2: #Check if blocks are detected in both left and right parts of the pavement if (len(left_center_list_b2 + left_center_list_b3) >= 1 and len(right_center_list_b2 + right_center_list_b3) >= 1): print('Block crack level 3 detected') flag_BC3 = 1 else: print('Block crack level 2 detected') flag_BC2 = 1 elif (len(left_center_list_b2 + left_center_list_b3 + right_center_list_b2 + right_center_list_b3)) >= 1: print('Block crack level 2 detected') flag_BC2 = 1 #Check for blocks detected only in one part of the pavement (R or L) >= 2 elif len(left_center_list_b2 + left_center_list_b3) >= 2 or len( right_center_list_b2 + right_center_list_b3) >= 2: #if 6 in classes: flag_BC2 = 1 print('Block Crack Level 2 detected') # Part of the code to return crack extent calculation if not (flag_BC2 or flag_BC3): extent_right, extent_left, extent_b1 = extent(box_list_right_load, box_list_left_load, box_list_block, b1_box_list, flag_BC2, flag_BC3) left_list_l.sort() left_list_l.reverse() right_list_l.sort() right_list_l.reverse() if (left_list_l == [] and right_list_l == []): if extent_b1 != 0: return ('0', extent_left, '0', extent_right, 0, 0, '1', extent_b1) else: return ('0', extent_left, '0', extent_right, 0, 0, 0, 0) elif left_list_l == []: if len(right_list_l) >= 2: right = str(right_list_l[0]) + ',' + str(right_list_l[1]) else: right = str(right_list_l[0]) if extent_b1 != 0: return (0, extent_left, right, extent_right, 0, 0, '1', extent_b1) else: return (0, extent_left, right, extent_right, 0, 0, 0, 0) elif right_list_l == []: if len(left_list_l) >= 2: left = str(left_list_l[0]) + ',' + str(left_list_l[1]) else: left = str(left_list_l[0]) if extent_b1 != 0: return (left, extent_left, 0, extent_right, 0, 0, '1', extent_b1) else: return (left, extent_left, 0, extent_right, 0, 0, 0, 0) else: if len(left_list_l) >= 2: left = str(left_list_l[0]) + ',' + str(left_list_l[1]) else: left = str(left_list_l[0]) if len(right_list_l) >= 2: right = str(right_list_l[0]) + ',' + str(right_list_l[1]) else: right = str(right_list_l[0]) if extent_b1 != 0: return (left, extent_left, right, extent_right, 0, 0, '1', extent_b1) else: return (left, extent_left, right, extent_right, 0, 0, 0, 0) else: extent_block, extent_right, extent_left = extent( box_list_right_load, box_list_left_load, box_list_block, b1_box_list, flag_BC2, flag_BC3) left_list_l.sort() left_list_l.reverse() right_list_l.sort() right_list_l.reverse() if (left_list_l == [] and right_list_l == []): left = '0' right = '0' elif left_list_l == []: left = '0' if len(right_list_l) >= 2: right = str(right_list_l[0]) + ',' + str(right_list_l[1]) else: right = str(right_list_l[0]) elif right_list_l == []: right = '0' if len(left_list_l) >= 2: left = str(left_list_l[0]) + ',' + str(left_list_l[1]) else: left = str(left_list_l[0]) else: if len(left_list_l) >= 2: left = str(left_list_l[0]) + ',' + str(left_list_l[1]) else: left = str(left_list_l[0]) if len(right_list_l) >= 2: right = str(right_list_l[0]) + ',' + str(right_list_l[1]) else: right = str(right_list_l[0]) if flag_BC2: return (left, extent_left, right, extent_right, '2', str(extent_block), '0', '0') else: return (left, extent_left, right, extent_right, '3', str(extent_block), '0', '0')
def setdiff(table1, table2, keys=None): """ Take a set difference of table rows. The row set difference will contain all rows in ``table1`` that are not present in ``table2``. If the keys parameter is not defined, all columns in ``table1`` will be included in the output table. Parameters ---------- table1 : `~astropy.table.Table` ``table1`` is on the left side of the set difference. table2 : `~astropy.table.Table` ``table2`` is on the right side of the set difference. keys : str or list of str Name(s) of column(s) used to match rows of left and right tables. Default is to use all columns in ``table1``. Returns ------- diff_table : `~astropy.table.Table` New table containing the set difference between tables. If the set difference is none, an empty table will be returned. Examples -------- To get a set difference between two tables:: >>> from astropy.table import setdiff, Table >>> t1 = Table({'a': [1, 4, 9], 'b': ['c', 'd', 'f']}, names=('a', 'b')) >>> t2 = Table({'a': [1, 5, 9], 'b': ['c', 'b', 'f']}, names=('a', 'b')) >>> print(t1) a b --- --- 1 c 4 d 9 f >>> print(t2) a b --- --- 1 c 5 b 9 f >>> print(setdiff(t1, t2)) a b --- --- 4 d >>> print(setdiff(t2, t1)) a b --- --- 5 b """ if keys is None: keys = table1.colnames #Check that all keys are in table1 and table2 for tbl, tbl_str in ((table1, 'table1'), (table2, 'table2')): diff_keys = np.setdiff1d(keys, tbl.colnames) if len(diff_keys) != 0: raise ValueError("The {} columns are missing from {}, cannot take " "a set difference.".format(diff_keys, tbl_str)) # Make a light internal copy of both tables t1 = table1.copy(copy_data=False) t1.meta = {} t1.keep_columns(keys) t1['__index1__'] = np.arange(len(table1)) # Keep track of rows indices # Make a light internal copy to avoid touching table2 t2 = table2.copy(copy_data=False) t2.meta = {} t2.keep_columns(keys) # Dummy column to recover rows after join t2['__index2__'] = np.zeros(len(t2), dtype=np.uint8) # dummy column t12 = _join(t1, t2, join_type='left', keys=keys, metadata_conflicts='silent') # If t12 index2 is masked then that means some rows were in table1 but not table2. if hasattr(t12['__index2__'], 'mask'): # Define bool mask of table1 rows not in table2 diff = t12['__index2__'].mask # Get the row indices of table1 for those rows idx = t12['__index1__'][diff] # Select corresponding table1 rows straight from table1 to ensure # correct table and column types. t12_diff = table1[idx] else: t12_diff = table1[[]] return t12_diff
def CurvatureISF3(vertices, faces): ''' Uses two ring vertices and normals. ''' tol = 1e-10 npt = vertices.shape[0] neighbor_tri = triangle_neighbors(faces, npt) neighbor_verts = np.array( [get_surf_neighbors(faces, neighbor_tri, k) for k in range(npt)]) e0 = vertices[faces[:, 2]] - vertices[faces[:, 1]] e1 = vertices[faces[:, 0]] - vertices[faces[:, 2]] e2 = vertices[faces[:, 1]] - vertices[faces[:, 0]] e0_norm = normr(e0) e1_norm = normr(e1) e2_norm = normr(e2) FaceNormals = 0.5 * fastcross(e0, e1) VN = GetVertexNormals(vertices, faces, FaceNormals, e0, e1, e2) up = np.zeros(vertices.shape) #Calculate initial coordinate system up[faces[:, 0]] = e2_norm up[faces[:, 1]] = e0_norm up[faces[:, 2]] = e1_norm #Calculate initial vertex coordinate system up = fastcross(VN, up) up = normr(up) vp = fastcross(up, VN) vp = normr(vp) qj = np.zeros([100, 5]) A = np.zeros([200, 5]) B = np.zeros([200, 1]) H = np.zeros(npt) K = np.zeros(npt) VNnew = np.zeros_like(VN) for i in range(npt): n1 = up[i] n2 = vp[i] n3 = VN[i] nbrs = np.unique(np.hstack(neighbor_verts[neighbor_verts[i]].flat)) nbrs = np.setdiff1d(nbrs, i) for iter in range(30): for j, (pj, nj) in enumerate(zip(vertices[nbrs], VN[nbrs])): qj[j] = np.array([ np.dot(pj - vertices[i], n1), np.dot(pj - vertices[i], n2), np.dot(pj - vertices[i], n3), -np.dot(nj, n1) / np.dot(nj, n3), -np.dot(nj, n2) / np.dot(nj, n3) ]) j = 0 k = 0 for (x, y, z, nx, ny) in qj: k += 1 if k == len(nbrs): break scale = 2 / (x**2 + y**2) A[j] = scale * np.array([x**2, x * y, y**2, x, y]) A[j + 1] = scale * np.array([2 * x, y, 0, 1, 0]) A[j + 2] = scale * np.array([0, x, 2 * y, 0, 1]) B[j] = scale * z B[j + 1] = scale * nx B[j + 2] = scale * ny j += 3 X = lstsq(A[:3 * len(nbrs), :], B[:3 * len(nbrs)], rcond=None) a, b, c, d, e = X[0] factor = 1.0 / np.sqrt(1.0 + d[0]**2 + e[0]**2) H[i] = factor**3 * (a + c + a * e**2 + c * d**2 - b * d * e) K[i] = factor**4 * (4 * a * c - b**2) oldn3 = n3.copy() n3 = factor * np.array([-d[0], -e[0], 1.0 ]) #new normal in local coordinates n3 = np.c_[n1, n2, oldn3].dot(n3) #new normal in global coordinates n2 = np.cross(n1, n3) n2 = n2 / np.linalg.norm(n2) n1 = np.cross(n3, n2) n1 = n1 / np.linalg.norm(n1) if np.linalg.norm(n3 - oldn3) < tol: up[i] = n1 vp[i] = n2 VN[i] = n3 break return K, -H, VN
def update(self, pick, value, group): ''' value is added into pick pick: scalar value: scalar group: scalar return if there's newly created group ''' self.__graph.nodes(np.array([pick]))[0].add(value) related_nodes = np.array([]).astype(int) for d in range(self.__graph.d): related_nodes = np.union1d(related_nodes,\ self.__graph.nodes_connect(pick, d + 1)) check_create = True if not self.__group_list[0].size: check_create = False if group is not 0: for index, class_list in enumerate(self.__group_list): if index != group and index != 0 and\ np.intersect1d(class_list, related_nodes): check_create = False self.__group_list[group] = np.asarray( np.append(self.__group_list[group], class_list)) self.__group_list[index] = np.array([]) else: combined = [] for index, class_list in enumerate(self.__group_list): if index != 0 and\ np.intersect1d(self.__group_list[index], related_nodes): check_create = False combined = np.append(combined, index) if combined: head = combined.pop(0) self.__group_list[head] = np.asarray( np.append(self.__group_list[head], related_nodes)) for i in combined: self.__group_list[head] = np.asarray( np.append(self.__group_list[head], self.__group_list[i])) self.__group_list[i] = np.array([]) if check_create: self.__group_list.append( np.asarray((np.append(related_nodes, pick)))) self.__group_list[0] = np.setdiff1d(self.__group_list[0], np.append(related_nodes, pick)) self.__class_number = len(self.__group_list) return check_create
def adsh_algo(code_length): os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu torch.manual_seed(0) torch.cuda.manual_seed(0) ''' parameter setting ''' max_iter = opt.max_iter epochs = opt.epochs batch_size = opt.batch_size learning_rate = opt.learning_rate weight_decay = 5 * 10 ** -4 num_samples = opt.num_samples gamma = opt.gamma record['param']['opt'] = opt record['param']['description'] = '[Comment: learning rate decay]' logger.info(opt) logger.info(code_length) logger.info(record['param']['description']) ''' dataset preprocessing ''' nums, dsets, labels = _dataset() num_database, num_test = nums dset_database, dset_test = dsets database_labels, test_labels = labels ''' model construction ''' model = cnn_model.CNNNet(opt.arch, code_length) model.cuda() adsh_loss = al.ADSHLoss(gamma, code_length, num_database) optimizer = optim.SGD(model.parameters(), lr=learning_rate, weight_decay=weight_decay) V = np.zeros((num_database, code_length)) model.train() for iter in range(max_iter): iter_time = time.time() ''' sampling and construct similarity matrix ''' select_index = list(np.random.permutation(range(num_database)))[0: num_samples] _sampler = subsetsampler.SubsetSampler(select_index) trainloader = DataLoader(dset_database, batch_size=batch_size, sampler=_sampler, shuffle=False, num_workers=4) ''' learning deep neural network: feature learning ''' sample_label = database_labels.index_select(0, torch.from_numpy(np.array(select_index))) Sim = calc_sim(sample_label, database_labels) U = np.zeros((num_samples, code_length), dtype=np.float) for epoch in range(epochs): for iteration, (train_input, train_label, batch_ind) in enumerate(trainloader): batch_size_ = train_label.size(0) u_ind = np.linspace(iteration * batch_size, np.min((num_samples, (iteration+1)*batch_size)) - 1, batch_size_, dtype=int) train_input = Variable(train_input.cuda()) output = model(train_input) S = Sim.index_select(0, torch.from_numpy(u_ind)) U[u_ind, :] = output.cpu().data.numpy() model.zero_grad() loss = adsh_loss(output, V, S, V[batch_ind.cpu().numpy(), :]) loss.backward() optimizer.step() adjusting_learning_rate(optimizer, iter) ''' learning binary codes: discrete coding ''' barU = np.zeros((num_database, code_length)) barU[select_index, :] = U Q = -2*code_length*Sim.cpu().numpy().transpose().dot(U) - 2 * gamma * barU for k in range(code_length): sel_ind = np.setdiff1d([ii for ii in range(code_length)], k) V_ = V[:, sel_ind] Uk = U[:, k] U_ = U[:, sel_ind] V[:, k] = -np.sign(Q[:, k] + 2 * V_.dot(U_.transpose().dot(Uk))) iter_time = time.time() - iter_time loss_ = calc_loss(V, U, Sim.cpu().numpy(), code_length, select_index, gamma) logger.info('[Iteration: %3d/%3d][Train Loss: %.4f]', iter, max_iter, loss_) record['train loss'].append(loss_) record['iter time'].append(iter_time) ''' training procedure finishes, evaluation ''' model.eval() testloader = DataLoader(dset_test, batch_size=1, shuffle=False, num_workers=4) qB = encode(model, testloader, num_test, code_length) rB = V map = calc_hr.calc_map(qB, rB, test_labels.numpy(), database_labels.numpy()) logger.info('[Evaluation: mAP: %.4f]', map) record['rB'] = rB record['qB'] = qB record['map'] = map filename = os.path.join(logdir, str(code_length) + 'bits-record.pkl') _save_record(record, filename)
f_tot, final_frate, remove_baseline=True, N=5, robust_std=False, Athresh=0.1, Npeaks=Npeaks, thresh_C=0.3) idx_components_r = np.where(r_values >= .5)[0] idx_components_raw = np.where(fitness_raw < -40)[0] idx_components_delta = np.where(fitness_delta < -20)[0] idx_components = np.union1d(idx_components_r, idx_components_raw) idx_components = np.union1d(idx_components, idx_components_delta) idx_components_bad = np.setdiff1d(list(range(len(traces))), idx_components) print(('Keeping ' + str(len(idx_components)) + ' and discarding ' + str(len(idx_components_bad)))) #%% pl.figure() crd = plot_contours(A_tot.tocsc()[:, idx_components], Cn, thr=0.9) #%% A_tot = A_tot.tocsc()[:, idx_components] C_tot = C_tot[idx_components] #%% save_results = True if save_results: np.savez('results_analysis_patch.npz', A_tot=A_tot, C_tot=C_tot,
def compute_jacobian(self, vec_x, fun_smooth_grad, fun_a_grad, fun_b_grad, vec_smooth_r, vec_a_r, vec_b_r): conf = self.conf mtx_s = fun_smooth_grad(vec_x) mtx_a = fun_a_grad(vec_x) mtx_b = fun_b_grad(vec_x) n_s = vec_smooth_r.shape[0] n_ns = vec_a_r.shape[0] if conf.semismooth: aa = nm.abs(vec_a_r) ab = nm.abs(vec_b_r) iz = nm.where((aa < (conf.macheps * max(aa.max(), 1.0))) & (ab < (conf.macheps * max(ab.max(), 1.0))))[0] inz = nm.setdiff1d(nm.arange(n_ns), iz) output('non_active/active: %d/%d' % (len(inz), len(iz))) mul_a = nm.empty_like(vec_a_r) mul_b = nm.empty_like(mul_a) # Non-active part of the jacobian. if len(inz) > 0: a_r_nz = vec_a_r[inz] b_r_nz = vec_b_r[inz] sqrt_ab = nm.sqrt(a_r_nz**2.0 + b_r_nz**2.0) mul_a[inz] = (a_r_nz / sqrt_ab) - 1.0 mul_b[inz] = (b_r_nz / sqrt_ab) - 1.0 # Active part of the jacobian. if len(iz) > 0: vec_z = nm.zeros_like(vec_x) vec_z[n_s + iz] = 1.0 mtx_a_z = mtx_a[iz] mtx_b_z = mtx_b[iz] sqrt_ab = nm.empty((iz.shape[0], ), dtype=vec_a_r.dtype) for ir in range(len(iz)): row_a_z = mtx_a_z[ir] row_b_z = mtx_b_z[ir] sqrt_ab[ir] = nm.sqrt((row_a_z * row_a_z.T).todense() + (row_b_z * row_b_z.T).todense()) mul_a[iz] = ((mtx_a_z * vec_z) / sqrt_ab) - 1.0 mul_b[iz] = ((mtx_b_z * vec_z) / sqrt_ab) - 1.0 else: iz = nm.where(vec_a_r > vec_b_r)[0] mul_a = nm.zeros_like(vec_a_r) mul_b = nm.ones_like(mul_a) mul_a[iz] = 1.0 mul_b[iz] = 0.0 mtx_ns = sp.spdiags(mul_a, 0, n_ns, n_ns) * mtx_a \ + sp.spdiags(mul_b, 0, n_ns, n_ns) * mtx_b mtx_jac = compose_sparse([[mtx_s], [mtx_ns]]).tocsr() mtx_jac.sort_indices() return mtx_jac
def CurvatureISF1(vertices, faces): ''' This uses a two-ring neighborhood around a point. ''' tol = 1e-10 npt = vertices.shape[0] neighbor_tri = triangle_neighbors(faces, npt) neighbor_verts = np.array( [get_surf_neighbors(faces, neighbor_tri, k) for k in range(npt)]) e0 = vertices[faces[:, 2]] - vertices[faces[:, 1]] e1 = vertices[faces[:, 0]] - vertices[faces[:, 2]] e2 = vertices[faces[:, 1]] - vertices[faces[:, 0]] e0_norm = normr(e0) e1_norm = normr(e1) e2_norm = normr(e2) FaceNormals = 0.5 * fastcross(e0, e1) VN = GetVertexNormals(vertices, faces, FaceNormals, e0, e1, e2) up = np.zeros(vertices.shape) #Calculate initial coordinate system up[faces[:, 0]] = e2_norm up[faces[:, 1]] = e0_norm up[faces[:, 2]] = e1_norm #Calculate initial vertex coordinate system up = fastcross(VN, up) up = normr(up) vp = fastcross(up, VN) vp = normr(vp) qj = np.zeros([30, 3]) A = np.zeros([36, 5]) B = np.zeros([36, 1]) H = np.zeros(npt) K = np.zeros(npt) for i in range(npt): n1 = up[i] n2 = vp[i] n3 = VN[i] nbrs = np.unique(np.hstack(neighbor_verts[neighbor_verts[i]].flat)) nbrs = np.setdiff1d(nbrs, i) for _ in range(30): for j, pj in enumerate(vertices[nbrs]): qj[j] = np.array([ np.dot(pj - vertices[i], n1), np.dot(pj - vertices[i], n2), np.dot(pj - vertices[i], n3) ]) j = 0 k = 0 for (x, y, z) in qj: k += 1 if k == len(nbrs): break scale = 2 / (x**2 + y**2) A[j] = scale * np.array([x**2, x * y, y**2, x, y]) B[j] = scale * z j += 1 X = lstsq(A[:len(nbrs), :], B[:len(nbrs)], rcond=None) a, b, c, d, e = X[0] factor = 1.0 / np.sqrt(1.0 + d[0]**2 + e[0]**2) oldn3 = n3.copy() n3 = factor * np.array([-d[0], -e[0], 1.0]) n3 = np.c_[n1, n2, oldn3].dot(n3) #new normal in local coordinates VN[i] = n3 #new normal in global coordinates. up,vp,VN system is not orthogonal anymore, but that is okay as it is not used again n2 = np.cross(n1, n3) n2 = n2 / np.linalg.norm(n2) n1 = np.cross(n3, n2) n1 = n1 / np.linalg.norm(n1) H[i] = factor**3 * (a + c + a * e**2 + c * d**2 - b * d * e) K[i] = factor**4 * (4 * a * c - b**2) if np.linalg.norm(n3 - oldn3) < tol: break return K, -H, VN
def create_connectivity_EI_dir(neuron_parameters, connectivity_parameters, save_AM_parameters): [ nrowE, ncolE, nrowI, ncolI, nE, nI, nN, neuron_type, neuron_paramsE, neuron_paramsI ] = neuron_parameters [ landscape_type, landscape_size, asymmetry, p, std, separation, width, alpha, seed ] = connectivity_parameters [pEE, pEI, pIE, pII] = p [stdEE, stdEI, stdIE, stdII] = std [AM_address, fname] = save_AM_parameters if asymmetry[0] == 'E': nrowL = nrowE else: nrowL = nrowI if landscape_type == 'symmetric': landscape = None elif landscape_type == 'random': landscape = cl.random(nrowL, {'seed': 0}) elif landscape_type == 'homogenous': landscape = cl.homogeneous(nrowL, {'phi': 3}) elif landscape_type == 'perlin': landscape = cl.Perlin(nrowL, {'size': int(landscape_size)}) elif landscape_type == 'perlinuniform': landscape = cl.Perlin_uniform(nrowL, { 'size': int(landscape_size), 'base': seed }) iso_AM = np.zeros((nN, nN)) dir_AM = np.zeros((nN, nN)) [alphaEE, alphaEI, alphaIE, alphaII] = [0, 0, 0, 0] if asymmetry == 'EE': alphaEE = alpha elif asymmetry == 'EI': alphaEI = alpha elif asymmetry == 'IE': alphaIE = alpha elif asymmetry == 'II': alphaII = alpha for idx in range(nE): targets, delays = lcrn.lcrn_gauss_targets( idx, nrowE, ncolE, nrowE, ncolE, int(pEE * nE * (1 - alphaEE)), stdEE) targets = targets[targets != idx] if asymmetry == 'EE': if landscape is not None: direction = landscape[idx] dir_targets = dirconn.get_directional_targets( idx, nrowE, ncolE, nrowE, ncolE, direction, separation, width, int(pEE * nE * alphaEE)) dir_targets = dir_targets[dir_targets != idx] targets = np.setdiff1d(targets, dir_targets) dir_AM[idx, dir_targets] = 1. iso_AM[idx, targets] = 1. targets, delays = lcrn.lcrn_gauss_targets( idx, nrowE, ncolE, nrowI, ncolI, int(pEI * nI * (1 - alphaEI)), stdEI) if asymmetry == 'EI': if landscape is not None: direction = landscape[idx] dir_targets = dirconn.get_directional_targets( idx, nrowE, ncolE, nrowI, ncolI, direction, separation, width, int(pEI * nI * alphaEI)) targets = np.setdiff1d(targets, dir_targets) dir_AM[idx, dir_targets + nE] = 1. iso_AM[idx, targets + nE] = 1. for idx in range(nI): targets, delays = lcrn.lcrn_gauss_targets( idx, nrowI, ncolI, nrowE, ncolE, int(pIE * nE * (1 - alphaIE)), stdIE) if asymmetry == 'IE': if landscape is not None: direction = landscape[idx] dir_targets = dirconn.get_directional_targets( idx, nrowI, ncolI, nrowE, ncolE, direction, separation, width, int(pIE * nE * alphaIE)) targets = np.setdiff1d(targets, dir_targets) dir_AM[idx + nE, dir_targets] = 1. iso_AM[idx + nE, targets] = 1. targets, delays = lcrn.lcrn_gauss_targets( idx, nrowI, ncolI, nrowI, ncolI, int(pII * nI * (1 - alphaII)), stdII) targets = targets[targets != idx] if asymmetry == 'II': if landscape is not None: direction = landscape[idx] dir_targets = dirconn.get_directional_targets( idx, nrowI, ncolI, nrowI, ncolI, direction, separation, width, int(pII * nI * alphaII)) dir_targets = dir_targets[dir_targets != idx] targets = np.setdiff1d(targets, dir_targets) dir_AM[idx + nE, dir_targets + nE] = 1. iso_AM[idx + nE, targets + nE] = 1. print('Multiple connections') print(np.where(iso_AM + dir_AM > 1.)) sparse.save_npz(AM_address + fname + 'isoAM', sparse.coo_matrix(iso_AM)) sparse.save_npz(AM_address + fname + 'dirAM', sparse.coo_matrix(dir_AM)) sparse.save_npz(AM_address + fname + 'landscape', sparse.coo_matrix(landscape)) return [iso_AM, dir_AM, landscape, nrowL]
1)), np.argmax(mdlParams['labels_array'][mdlParams['trainInd'], :], 1)) print("Current class weights", class_weights) class_weights = class_weights * mdlParams['extra_fac'] print("Current class weights with extra", class_weights) elif mdlParams['balance_classes'] == 3 or mdlParams[ 'balance_classes'] == 4: # Split training set by classes not_one_hot = np.argmax(mdlParams['labels_array'], 1) mdlParams['class_indices'] = [] for i in range(mdlParams['numClasses']): mdlParams['class_indices'].append( np.where(not_one_hot == i)[0]) # Kick out non-trainind indices mdlParams['class_indices'][i] = np.setdiff1d( mdlParams['class_indices'][i], mdlParams['valInd']) #print("Class",i,mdlParams['class_indices'][i].shape,np.min(mdlParams['class_indices'][i]),np.max(mdlParams['class_indices'][i]),np.sum(mdlParams['labels_array'][np.int64(mdlParams['class_indices'][i]),:],0)) elif mdlParams['balance_classes'] == 5 or mdlParams[ 'balance_classes'] == 6 or mdlParams['balance_classes'] == 13: # Other class balancing loss class_weights = 1.0 / np.mean( mdlParams['labels_array'][mdlParams['trainInd'], :], axis=0) print("Current class weights", class_weights) class_weights = class_weights * mdlParams['extra_fac'] print("Current class weights with extra", class_weights) elif mdlParams['balance_classes'] == 9: # Only use HAM indicies for calculation indices_ham = mdlParams['trainInd'][mdlParams['trainInd'] < 10015] class_weights = 1.0 / np.mean( mdlParams['labels_array'][indices_ham, :], axis=0) print("Current class weights", class_weights)
def findFF(AM, N_start, n_cluster, nffn): """ finds feed forward path given a starting set of neurons """ F_all = [N_start] Fi = N_start N_FF = list(N_start) FF_full = -1 #AM = lil_matrix.toarray(AM) spread_seq = [] for i in range(nffn): AMi = np.zeros(np.shape(AM)) AMi[Fi, :] = np.array(AM[Fi, :]) AMi_sum = np.sum(AMi, axis=0) #print(np.unique(AMi_sum).astype(int)) Pi = np.where(AMi_sum > 0.)[0] Pi = np.setdiff1d(Pi, N_FF) if np.size(Pi) < n_cluster: FF_full = 0 print([i, 'broken']) break Fi1 = np.argsort(AMi_sum)[-n_cluster:] cd = np.mean(get_spread(Fi1, np.shape(AM)[0])) spread_seq.append(cd) FF_full = 1 F_all.append(Fi1) N_FF += Fi1.tolist() Fi = Fi1 plt.plot(spread_seq, '.') plt.show() # Calculate effective length nN = np.shape(AM)[0] nrow = ncol = int(np.sqrt(nN)) F0 = F_all[0] F50 = F_all[-1] F0_coor = np.zeros((n_cluster, 2)) F50_coor = np.zeros((n_cluster, 2)) for i, n in enumerate(F0): F0_coor[i, 0] = n // nrow # x coor F0_coor[i, 1] = n % nrow # y coor for i, n in enumerate(F50): F50_coor[i, 0] = n // nrow # x coor F50_coor[i, 1] = n % nrow # y coor F0_centroid = (round(np.mean(F0_coor[:, 0])), round(np.mean(F0_coor[:, 1]))) F50_centroid = (round(np.mean(F50_coor[:, 0])), round(np.mean(F50_coor[:, 1]))) effective_length = float( distance.cdist([F0_centroid], [F50_centroid], 'euclidean')) return F_all, effective_length, FF_full
def create_connectivity_EI_random_dir(neuron_parameters, connectivity_parameters, save_AM_parameters): [ nrowE, ncolE, nrowI, ncolI, nE, nI, nN, neuron_type, neuron_paramsE, neuron_paramsI ] = neuron_parameters [landscape_type, landscape_size, asymmetry, p, shift, std, alpha, seed] = connectivity_parameters [pEE, pEI, pIE, pII] = p [stdEE, stdEI, stdIE, stdII] = std [AM_address, fname] = save_AM_parameters if asymmetry[0] == 'E': nrowL = nrowE else: nrowL = nrowI if asymmetry[-1] == 'E': nrowM = nrowE else: nrowM = nrowI move = cl.move(nrowM) if landscape_type == 'symmetric': landscape = None elif landscape_type == 'random': landscape = cl.random(nrowL, {'seed': 0}) elif landscape_type == 'homogenous': landscape = cl.homogeneous(nrowL, {'phi': 3}) elif landscape_type == 'perlin': landscape = cl.Perlin(nrowL, {'size': int(landscape_size)}) elif landscape_type == 'perlinuniform': landscape = cl.Perlin_uniform(nrowL, { 'size': int(landscape_size), 'base': seed }) ran_AM = np.zeros((nN, nN)) dir_AM = np.zeros((nN, nN)) [alphaEE, alphaEI, alphaIE, alphaII] = [0, 0, 0, 0] if asymmetry == 'EE': alphaEE = alpha elif asymmetry == 'EI': alphaEI = alpha elif asymmetry == 'IE': alphaIE = alpha elif asymmetry == 'II': alphaII = alpha for idx in range(nE): targets = [] if (asymmetry == 'EE') and (landscape is not None): targets, delays = lcrn.lcrn_gauss_targets(idx, nrowE, ncolE, nrowE, ncolE, int(pEE * nE * alphaEE), stdEE) targets = (targets + shift * move[landscape[idx] % len(move)]) % nE targets = targets[targets != idx].astype(int) dir_AM[idx, targets] = 1. r_targets = get_random_targets(idx, nrowE, ncolE, nrowE, ncolE, int(pEE * nE * (1 - alphaEE))) r_targets = np.setdiff1d(r_targets, targets) ran_AM[idx, r_targets] = 1. targets = [] if (asymmetry == 'EI') and (landscape is not None): targets, delays = lcrn.lcrn_gauss_targets(idx, nrowE, ncolE, nrowI, ncolI, int(pEI * nI * alphaEI), stdEI) targets = (targets + shift * move[landscape[idx] % len(move)]) % nI dir_AM[idx, targets + nE] = 1. r_targets = get_random_targets(idx, nrowE, ncolE, nrowI, ncolI, int(pEI * nI * (1 - alphaEI))) r_targets = np.setdiff1d(r_targets, targets) ran_AM[idx, r_targets + nE] = 1. for idx in range(nI): targets = [] if (asymmetry == 'IE') and (landscape is not None): targets, delays = lcrn.lcrn_gauss_targets(idx, nrowI, ncolI, nrowE, ncolE, int(pIE * nE * alphaIE), stdIE) targets = (targets + shift * move[landscape[idx] % len(move)]) % nI dir_AM[idx + nE, targets] = 1. r_targets = get_random_targets(idx, nrowI, ncolI, nrowE, ncolE, int(pIE * nE * (1 - alphaIE))) r_targets = np.setdiff1d(r_targets, targets) ran_AM[idx + nE, r_targets] = 1. targets = [] if (asymmetry == 'II') and (landscape is not None): targets, delays = lcrn.lcrn_gauss_targets(idx, nrowI, ncolI, nrowI, ncolI, int(pII * nI * alphaII), stdII) targets = (targets + shift * move[landscape[idx] % len(move)]) % nI targets = targets[targets != idx].astype(int) dir_AM[idx + nE, targets + nE] = 1. r_targets = get_random_targets(idx, nrowI, ncolI, nrowI, ncolI, int(pII * nI * (1 - alphaII))) r_targets = np.setdiff1d(r_targets, targets) ran_AM[idx + nE, r_targets + nE] = 1. print('Multiple connections') print(np.where(ran_AM + dir_AM > 1.)) sparse.save_npz(AM_address + fname + 'random_rAM', sparse.coo_matrix(ran_AM)) sparse.save_npz(AM_address + fname + 'random_dAM', sparse.coo_matrix(dir_AM)) sparse.save_npz(AM_address + fname + 'landscape', sparse.coo_matrix(landscape)) return [ran_AM, dir_AM, landscape, nrowL]
numMatch= 0.0 for j in range(0,len(np.where(zgbpd.gID==1)[0])): k1 = np.where(z1==b[j])[0] k2 = np.where(z2==b[j])[0] numMatch = numMatch + len(np.intersect1d(k1,k2)) percentVoxelMatch =numMatch/zdata.dims[0]/zdata.dims[1] zgbpd.computeNeighbors() zdata.computeNeighbors() neighborStats = np.zeros([zdata.Ngrain,3]) # col1: if all correct # col2: if only 1 wrong # col3: difference in # of grains bbad = np.where(zgbpd.gID==0)[0] for j in range(0,zdata.Ngrain): k1 = np.setdiff1d(zdata.neighbors[j],bbad) k2 = np.setdiff1d(zgbpd.neighbors[j],bbad) if (len(np.setdiff1d(k1,k2))==0): neighborStats[j,0]=1.0 if (len(np.setdiff1d(k1,k2))<=1): neighborStats[j,1]=1.0 neighborStats[j,2] = len(k2)-len(k1) k1 = neighborStats[np.where(zgbpd.gID==1)[0],0] percentAllNeighbor = np.sum(k1)/len(np.where(zgbpd.gID==1)[0]) k1 = neighborStats[np.where(zgbpd.gID==1)[0],1] percentOneNeighbor = np.sum(k1)/len(np.where(zgbpd.gID==1)[0]) k1 = np.where( (neighborStats[:,2]>0) & (zgbpd.gID==1) )[0] percentNeighborExcess = np.sum(neighborStats[k1,2])/ \ len(np.where(zgbpd.gID==1)[0]) k1 = np.where( (neighborStats[:,2]<0) & (zgbpd.gID==1) )[0] percentNeighborLess = np.abs(np.sum(neighborStats[k1,2]))/ \
def findFF2(AM, iAM, N_start, n_cluster, nffn): """ finds feed forward path given a starting set of neurons, neurons are chosen based on maximum excitation received and least inhibition received """ F_all = [N_start] Fi = N_start N_FF = list(N_start) FF_full = -1 #AM = lil_matrix.toarray(AM) #spread_seq = [] for i in range(nffn): AMi = np.zeros(np.shape(AM)) AMi[Fi, :] = np.array(AM[Fi, :]) AMi_sum = np.sum(AMi, axis=0) iAMi = np.zeros(np.shape(iAM)) iAMi[Fi, :] = np.array(iAM[Fi, :]) iAMi_sum = np.sum(iAMi, axis=0) if i < 5: print(np.unique(iAMi_sum).astype(int)) # --- 1 Pi = np.arange(np.shape(AM)[0]) Pi = np.setdiff1d(Pi, N_FF) # --- 2 #Pi = np.where(AMi_sum > 0.)[0] #Pi = np.setdiff1d(Pi, N_FF) # --- 3 #Pi = np.setdiff1d(np.arange(np.shape(AM)[0]).astype(int), N_FF) if np.size(Pi) < n_cluster: FF_full = 0 print([i, 'broken']) break # find the ones that have least inhibition # --- 1 Fi1_id = np.argsort(AMi_sum[Pi] - iAMi_sum[Pi])[-n_cluster:] Fi1 = Pi[Fi1_id] # --- 2 #Fi1_id = np.argsort(iAMi_sum[Pi])[:n_cluster] #Fi1 = Pi[Fi1_id] # --- 3 #Fi1_id = np.argsort(AMi_sum[Pi] - iAMi_sum[Pi])[-n_cluster:] #Fi1 = Pi[Fi1_id] #print(AMi_sum[Fi1_id] - iAMi_sum[Fi1_id]) #cd = np.max(get_spread(Fi1, np.shape(AM)[0])) #spread_seq.append(cd) FF_full = 1 F_all.append(Fi1) N_FF += Fi1.tolist() Fi = Fi1 #fig, ax = plt.subplots() #ax.plot(spread_seq, '.') #plt.show() # Calculate effective length nN = np.shape(AM)[0] nrow = ncol = int(np.sqrt(nN)) F0 = F_all[0] F50 = F_all[-1] F0_coor = np.zeros((n_cluster, 2)) F50_coor = np.zeros((n_cluster, 2)) for i, n in enumerate(F0): F0_coor[i, 0] = n // nrow # x coor F0_coor[i, 1] = n % nrow # y coor for i, n in enumerate(F50): F50_coor[i, 0] = n // nrow # x coor F50_coor[i, 1] = n % nrow # y coor F0_centroid = (round(np.mean(F0_coor[:, 0])), round(np.mean(F0_coor[:, 1]))) F50_centroid = (round(np.mean(F50_coor[:, 0])), round(np.mean(F50_coor[:, 1]))) effective_length = float( distance.cdist([F0_centroid], [F50_centroid], 'euclidean')) return F_all, effective_length, FF_full
def __call__(self, current_order, current_pos): # all batches contain unique indices remaining = current_order[current_pos:] first = numpy.setdiff1d(numpy.arange(len(current_order)), remaining) second = numpy.setdiff1d(numpy.arange(len(current_order)), first) return numpy.concatenate((first, second))
polioudakis_all = np.array(polioudakis.iloc[:, 0]) polioudakis.index = polioudakis_all polioudakis_top = np.array(markers.iloc[:, 0][markers['p_val_adj'] < 0.05]) topGenes = results_subset['g'][results_subset['FDR'] < 0.05] commonGenes = np.intersect1d(goodGenes, polioudakis_all) topGenes_common = topGenes[[ topGenes.iloc[i] in commonGenes for i in range(len(topGenes)) ]] polioudakis_common = polioudakis_top[[ polioudakis_top[i] in commonGenes for i in range(len(polioudakis_top)) ]] newGenes = np.setdiff1d(topGenes_common, polioudakis_common) # How many genes were detected in both our and polioudakis data? print(len(commonGenes)) # How many of those genes show up as variable with our or their method? print(len(topGenes_common)) print(len(polioudakis_common)) # How many of those genes are exclusive to our data? print(len(newGenes)) # Subset results:
def solve(c4n, n4e, n4db, ind4e, f, u_D, degree): """ Computes the coordinates of nodes and elements. Parameters - ``c4n`` (``float64 array``) : coordinates for nodes - ``n4e`` (``int32 array``) : nodes for elements - ``n4db`` (``int32 array``) : nodes for Dirichlet boundary - ``ind4e`` (``int32 array``) : indices for elements - ``f`` (``lambda``) : source term - ``u_D`` (``lambda``) : Dirichlet boundary condition - ``degree`` (``int32``) : Polynomial degree Returns - ``x`` (``float64 array``) : solution Example >>> N = 2 >>> from mozart.mesh.rectangle import interval >>> c4n, n4e, n4db, ind4e = interval(0, 1, 4, 2) >>> f = lambda x: np.ones_like(x) >>> u_D = lambda x: np.zeros_like(x) >>> from mozart.poisson.fem.interval import solve >>> x = solve(c4n, n4e, n4db, ind4e, f, u_D, N) >>> x array([ 0. , 0.0546875, 0.09375 , 0.1171875, 0.125 , 0.1171875, 0.09375 , 0.0546875, 0. ]) """ M_R, S_R, D_R = getMatrix(degree) fval = f(c4n[ind4e].flatten()) nrNodes = int(c4n.shape[0]) nrElems = int(n4e.shape[0]) nrLocal = int(M_R.shape[0]) I = np.zeros((nrElems * nrLocal * nrLocal), dtype=np.int32) J = np.zeros((nrElems * nrLocal * nrLocal), dtype=np.int32) Alocal = np.zeros((nrElems * nrLocal * nrLocal), dtype=np.float64) b = np.zeros(nrNodes) Poison_1D = lib['Poisson_1D'] # need the extern!! Poison_1D.argtypes = ( c_void_p, c_void_p, c_void_p, c_int, c_void_p, c_void_p, c_int, c_void_p, c_void_p, c_void_p, c_void_p, c_void_p, ) Poison_1D.restype = None Poison_1D(c_void_p(n4e.ctypes.data), c_void_p(ind4e.ctypes.data), c_void_p(c4n.ctypes.data), c_int(nrElems), c_void_p(M_R.ctypes.data), c_void_p(S_R.ctypes.data), c_int(nrLocal), c_void_p(fval.ctypes.data), c_void_p(I.ctypes.data), c_void_p(J.ctypes.data), c_void_p(Alocal.ctypes.data), c_void_p(b.ctypes.data)) from scipy.sparse import coo_matrix from scipy.sparse.linalg import spsolve STIMA_COO = coo_matrix((Alocal, (I, J)), shape=(nrNodes, nrNodes)) STIMA_CSR = STIMA_COO.tocsr() dof = np.setdiff1d(range(0, nrNodes), n4db) x = np.zeros(nrNodes) x[dof] = spsolve(STIMA_CSR[dof, :].tocsc()[:, dof].tocsr(), b[dof]) return x
def Evaluate(self, kNN, top_N): precision = 0 recall = 0 user_count = 0 train = self.traindata test = self.testdata num_users = self.num_users num_items = self.num_items idcg = np.zeros(num_users) dcg = np.zeros(num_users) ndcg = np.zeros(num_users) map = np.zeros(num_users) for u in range(num_users): r_u_test = test[u] test_items = np.nonzero(r_u_test) test_items_idx = test_items[0] if len( test_items_idx ) == 0: # if this user does not possess any rating in the test set, skip the evaluate procedure continue else: r_u_train = train[u] train_items = np.nonzero(r_u_train) train_items_idx = train_items[ 0] # items user u rated in the train data set, which we do not need to predict pred_item_idx = np.setdiff1d(range(num_items), train_items_idx) pred_sore = np.zeros(num_items) for item in pred_item_idx: pred_sore[item] = self.ItemCFPrediction(u, item, kNN) rec_cand = np.argsort(-pred_sore) rec_list = rec_cand[0:top_N] hit_set = np.intersect1d(rec_list, test_items_idx) precision = precision + len(hit_set) / (top_N * 1.0) recall = recall + len(hit_set) / (len(test_items_idx) * 1.0) user_count = user_count + 1 # calculate the ndcg and map measure if len(hit_set) != 0: rel_list = np.zeros((len(rec_list))) rank = 0.0 # to calculate the idcg measure for item in hit_set: rec_of_i = list(rec_list) item_rank = rec_of_i.index( item ) # relevant items in the rec_of_i, to calculate dcg measure rel_list[item_rank] = 1 dcg[u] = dcg[u] + 1.0 / math.log(item_rank + 2, 2) idcg[u] = idcg[u] + 1.0 / math.log(rank + 2, 2) map[u] = map[u] + (rank + 1) / (item_rank + 1.0) rank = rank + 1 ndcg[u] = dcg[u] / (idcg[u] * 1.0) map[u] = map[u] / len(hit_set) ndcg = sum(ndcg) / user_count map = sum(map) / user_count precision = precision / (user_count * 1.0) recall = recall / (user_count * 1.0) return precision, recall, ndcg, map