def multiplicity(position, sgname=None, sgno=None, cell_choice='standard'): """ Calculates the multiplicity of a fractional position in the unit cell. If called by sgno, cell_choice is necessary for eg rhombohedral space groups. """ if sgname != None: mysg = sg.sg(sgname=sgname, cell_choice=cell_choice) elif sgno !=None: mysg = sg.sg(sgno=sgno, cell_choice=cell_choice) else: raise ValueError, 'No space group information provided' lp = n.zeros((mysg.nsymop, 3)) for i in range(mysg.nsymop): lp[i, :] = n.dot(position, mysg.rot[i]) + mysg.trans[i] lpu = n.array([lp[0, :]]) multi = 1 for i in range(1, mysg.nsymop): for j in range(multi): t = lp[i]-lpu[j] if n.sum(n.mod(t, 1)) < 0.00001: break else: if j == multi-1: lpu = n.concatenate((lpu, [lp[i, :]])) multi += 1 return multi
def _set_distribution(indata): prefix = 'distribution_' if indata[prefix + 'name'] == 'HMM': sg('new_hmm', indata[prefix + 'N'], indata[prefix + 'M']) sg('bw') else: raise NotImplementedError, 'Can\'t yet train other distributions than HMM in static interface.'
def _set_distribution (indata): prefix='distribution_' if indata[prefix+'name']=='HMM': sg('new_hmm', indata[prefix+'N'], indata[prefix+'M']) sg('bw') else: raise NotImplementedError, 'Can\'t yet train other distributions than HMM in static interface.'
def multiplicity(hkls, sgname=None, sgno=None, cell_choice="standard"): """ Calculate the powder diffraction multiplicity of a set of reflections INPUT: hkls : HKLs for the reflections sgno/sgname : provide either the space group number or its name e.g. sgno=225 or equivalently sgname='Fm-3m' OUTPUT: array of multiplicities """ if sgname != None: spg = sg.sg(sgname=sgname, cell_choice=cell_choice) elif sgno != None: spg = sg.sg(sgno=sgno, cell_choice=cell_choice) else: raise ValueError, "No space group information given" # Making sure that the inversion element also for non-centrosymmetric space groups Rots = np.concatenate((spg.rot[: spg.nuniq], -spg.rot[: spg.nuniq])) (dummy, rows) = np.unique((Rots * np.random.rand(3, 3)).sum(axis=2).sum(axis=1), return_index=True) Rots = Rots[np.sort(rows)] M = [] for refl in hkls: a = np.array([np.dot(refl[:3], R) for R in Rots]) (dummy, rows) = np.unique((a * np.random.rand(3)).sum(axis=1), return_index=True) M.append(a[rows].shape[0]) return np.array(M)
def _get_alpha_and_sv(indata, prefix): if not indata.has_key(prefix+'alpha_sum') and \ not indata.has_key(prefix+'sv_sum'): return None, None a=0 sv=0 if indata.has_key(prefix+'label_type') and \ indata[prefix+'label_type']=='series': for i in xrange(sg('get_num_svms')): [dump, weights]=sg('get_svm', i) weights=weights.T for item in weights[0].tolist(): a+=item for item in weights[1].tolist(): sv+=item a=abs(a-indata[prefix+'alpha_sum']) sv=abs(sv-indata[prefix+'sv_sum']) else: [dump, weights]=sg('get_svm') weights=weights.T for item in weights[0].tolist(): a+=item a=abs(a-indata[prefix+'alpha_sum']) for item in weights[1].tolist(): sv+=item sv=abs(sv-indata[prefix+'sv_sum']) return a, sv
def _evaluate (indata, prefix): dmatrix=sg('get_distance_matrix', 'TRAIN') dm_train=max(abs(indata['distance_matrix_train']-dmatrix).flat) dmatrix=sg('get_distance_matrix', 'TEST') dm_test=max(abs(indata['distance_matrix_test']-dmatrix).flat) return util.check_accuracy( indata[prefix+'accuracy'], dm_train=dm_train, dm_test=dm_test)
def predict(self, testPoints): '''Predicts performance using previously learned model. self.train() must be called before this!''' if len(testPoints.shape) < 2: testPoints = array([testPoints]) sg('set_features', 'TEST', phys2unif(testPoints,self.ranges).T) predictions = sg('classify') return predictions
def _train(indata): if indata['regression_type'] == 'svm': sg('c', double(indata['regression_C'])) sg('svm_epsilon', indata['regression_epsilon']) sg('svr_tube_epsilon', indata['regression_tube_epsilon']) elif indata['regression_type'] == 'kernelmachine': sg('krr_tau', indata['regression_tau']) else: raise StandardError, 'Incomplete regression data.' sg('train_regression')
def _train (indata): if indata['regression_type']=='svm': sg('c', double(indata['regression_C'])) sg('svm_epsilon', indata['regression_epsilon']) sg('svr_tube_epsilon', indata['regression_tube_epsilon']) elif indata['regression_type']=='kernelmachine': sg('krr_tau', indata['regression_tau']) else: raise StandardError, 'Incomplete regression data.' sg('train_regression')
def predict(self, testPoints): """Predicts performance using previously learned model. self.train() must be called before this!""" if len(testPoints.shape) < 2: testPoints = array([testPoints]) sg("set_features", "TEST", phys2unif(testPoints, self.ranges).T) predictions = sg("classify") return predictions
def _evaluate (indata, prefix): util.set_and_train_kernel(indata) kmatrix=sg('get_kernel_matrix', 'TRAIN') km_train=max(abs(indata['kernel_matrix_train']-kmatrix).flat) kmatrix=sg('get_kernel_matrix', 'TEST') km_test=max(abs(indata['kernel_matrix_test']-kmatrix).flat) return util.check_accuracy( indata[prefix+'accuracy'], km_train=km_train, km_test=km_test)
def predict(self, testPoints): '''Predicts performance using previously learned model. self.train() must be called before this!''' if len(testPoints.shape) < 2: testPoints = array([testPoints]) sg('set_features', 'TEST', phys2unif(testPoints, self.ranges).T) predictions = sg('classify') return predictions
def clustering_kmeans (fm_train=traindat, size_cache=10,k=3,iter=1000): sg('set_features', 'TRAIN', fm_train) sg('set_distance', 'EUCLIDIAN', 'REAL') sg('new_clustering', 'KMEANS') sg('train_clustering', k, iter) [radi, centers]=sg('get_clustering') return [radi, centers]
def clustering_hierarchical (fm_train=traindat, size_cache=10,merges=3): sg('set_features', 'TRAIN', fm_train) sg('set_distance', 'EUCLIDIAN', 'REAL') sg('new_clustering', 'HIERARCHICAL') sg('train_clustering', merges) [merge_distance, pairs]=sg('get_clustering') return [merge_distance, pairs]
def distance(): print 'Distance' width = 1.7 size_cache = 10 from sg import sg sg('set_features', 'TRAIN', fm_train_real) sg('set_features', 'TEST', fm_test_real) sg('set_distance', 'EUCLIDIAN', 'REAL') sg('set_kernel', 'DISTANCE', size_cache, width) km = sg('get_kernel_matrix', 'TRAIN') km = sg('get_kernel_matrix', 'TEST')
def distance_chebyshew (fm_train_real=traindat,fm_test_real=testdat): sg('set_distance', 'CHEBYSHEW', 'REAL') sg('set_features', 'TRAIN', fm_train_real) dm=sg('get_distance_matrix', 'TRAIN') sg('set_features', 'TEST', fm_test_real) dm=sg('get_distance_matrix', 'TEST') return dm
def _train (indata, prefix): if indata.has_key(prefix+'max_iter'): max_iter=indata[prefix+'max_iter'] else: max_iter=1000 if indata.has_key(prefix+'k'): first_arg=indata[prefix+'k'] elif indata.has_key(prefix+'merges'): first_arg=indata[prefix+'merges'] else: raise StandardError, 'Incomplete clustering data.' sg('train_clustering', first_arg, max_iter)
def kernel_const(fm_train_real=traindat, fm_test_real=testdat, c=23.0, size_cache=10): sg("set_features", "TRAIN", fm_train_real) sg("set_features", "TEST", fm_test_real) sg("set_kernel", "CONST", "REAL", size_cache, c) km = sg("get_kernel_matrix", "TRAIN") km = sg("get_kernel_matrix", "TEST") return km
def distance_cosine (fm_train_real=traindat,fm_test_real=testdat): sg('set_distance', 'COSINE', 'REAL') sg('set_features', 'TRAIN', fm_train_real) dm=sg('get_distance_matrix', 'TRAIN') sg('set_features', 'TEST', fm_test_real) dm=sg('get_distance_matrix', 'TEST') return dm
def distance_euclidian(fm_train_real=traindat, fm_test_real=testdat): sg('set_distance', 'EUCLIDIAN', 'REAL') sg('set_features', 'TRAIN', fm_train_real) dm = sg('get_distance_matrix', 'TRAIN') sg('set_features', 'TEST', fm_test_real) dm = sg('get_distance_matrix', 'TEST') return dm
def distance_chisquare (fm_train_real=traindat,fm_test_real=testdat): sg('set_distance', 'CHISQUARE', 'REAL') sg('set_features', 'TRAIN', fm_train_real) dm=sg('get_distance_matrix', 'TRAIN') sg('set_features', 'TEST', fm_test_real) dm=sg('get_distance_matrix', 'TEST') return dm
def distance_chisquare(fm_train_real=traindat, fm_test_real=testdat): sg('set_distance', 'CHISQUARE', 'REAL') sg('set_features', 'TRAIN', fm_train_real) dm = sg('get_distance_matrix', 'TRAIN') sg('set_features', 'TEST', fm_test_real) dm = sg('get_distance_matrix', 'TEST') return dm
def distance_braycurtis (fm_train_real=traindat,fm_test_real=testdat): sg('set_distance', 'BRAYCURTIS', 'REAL') sg('set_features', 'TRAIN', fm_train_real) dm=sg('get_distance_matrix', 'TRAIN') sg('set_features', 'TEST', fm_test_real) dm=sg('get_distance_matrix', 'TEST') return dm
def distance_cosine(fm_train_real=traindat, fm_test_real=testdat): sg('set_distance', 'COSINE', 'REAL') sg('set_features', 'TRAIN', fm_train_real) dm = sg('get_distance_matrix', 'TRAIN') sg('set_features', 'TEST', fm_test_real) dm = sg('get_distance_matrix', 'TEST') return dm
def kernel_const (fm_train_real=traindat,fm_test_real=testdat,c=23.,size_cache=10): sg('set_features', 'TRAIN', fm_train_real) sg('set_features', 'TEST', fm_test_real) sg('set_kernel', 'CONST', 'REAL', size_cache, c) km=sg('get_kernel_matrix', 'TRAIN') km=sg('get_kernel_matrix', 'TEST') return km
def distance_geodesic (fm_train_real=traindat,fm_test_real=testdat): sg('set_distance', 'GEODESIC', 'REAL') sg('set_features', 'TRAIN', fm_train_real) dm=sg('get_distance_matrix', 'TRAIN') sg('set_features', 'TEST', fm_test_real) dm=sg('get_distance_matrix', 'TEST') return dm
def distance_geodesic(fm_train_real=traindat, fm_test_real=testdat): sg('set_distance', 'GEODESIC', 'REAL') sg('set_features', 'TRAIN', fm_train_real) dm = sg('get_distance_matrix', 'TRAIN') sg('set_features', 'TEST', fm_test_real) dm = sg('get_distance_matrix', 'TEST') return dm
def distance(): print "Distance" width = 1.7 size_cache = 10 from sg import sg sg("set_features", "TRAIN", fm_train_real) sg("set_features", "TEST", fm_test_real) sg("set_distance", "EUCLIDIAN", "REAL") sg("set_kernel", "DISTANCE", size_cache, width) km = sg("get_kernel_matrix", "TRAIN") km = sg("get_kernel_matrix", "TEST")
def distance_euclidean (fm_train_real=traindat,fm_test_real=testdat): sg('set_distance', 'EUCLIDEAN', 'REAL') sg('set_features', 'TRAIN', fm_train_real) dm=sg('get_distance_matrix', 'TRAIN') sg('set_features', 'TEST', fm_test_real) dm=sg('get_distance_matrix', 'TEST') return dm
def distance_chebyshew(fm_train_real=traindat, fm_test_real=testdat): sg('set_distance', 'CHEBYSHEW', 'REAL') sg('set_features', 'TRAIN', fm_train_real) dm = sg('get_distance_matrix', 'TRAIN') sg('set_features', 'TEST', fm_test_real) dm = sg('get_distance_matrix', 'TEST') return dm
def distance_jensen (fm_train_real=traindat,fm_test_real=testdat): sg('set_distance', 'JENSEN', 'REAL') sg('set_features', 'TRAIN', fm_train_real) dm=sg('get_distance_matrix', 'TRAIN') sg('set_features', 'TEST', fm_test_real) dm=sg('get_distance_matrix', 'TEST') return dm
def _train(indata, prefix): if indata.has_key(prefix + 'max_iter'): max_iter = indata[prefix + 'max_iter'] else: max_iter = 1000 if indata.has_key(prefix + 'k'): first_arg = indata[prefix + 'k'] elif indata.has_key(prefix + 'merges'): first_arg = indata[prefix + 'merges'] else: raise StandardError, 'Incomplete clustering data.' sg('train_clustering', first_arg, max_iter)
def kernel_oligostring(fm_train_dna=traindna, fm_test_dna=testdna, size_cache=10, k=3, width=1.2): sg("set_features", "TRAIN", fm_train_dna, "DNA") sg("set_features", "TEST", fm_test_dna, "DNA") sg("set_kernel", "OLIGO", "CHAR", size_cache, k, width) km = sg("get_kernel_matrix", "TRAIN") km = sg("get_kernel_matrix", "TEST") return km
def distribution_linearhmm (fm_train=traindna,fm_cube=cubedna, order=3,gap=0,reverse='n'): # sg('new_distribution', 'LinearHMM') sg('add_preproc', 'SORTWORDSTRING') sg('set_features', 'TRAIN', fm_train, 'DNA') sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse) sg('attach_preproc', 'TRAIN')
def kernel_weighteddegreepositonstring(fm_train_dna=traindna, fm_test_dna=testdna, size_cache=10, degree=20): sg("set_features", "TRAIN", fm_train_dna, "DNA") sg("set_features", "TEST", fm_test_dna, "DNA") sg("set_kernel", "WEIGHTEDDEGREEPOS", "CHAR", size_cache, degree) km = sg("get_kernel_matrix", "TRAIN") km = sg("get_kernel_matrix", "TEST") return km
def kernel_gaussian (fm_train_real=traindat,fm_test_real=testdat, width=1.4,size_cache=10): sg('set_features', 'TRAIN', fm_train_real) sg('set_features', 'TEST', fm_test_real) sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width) km=sg('get_kernel_matrix', 'TRAIN') km=sg('get_kernel_matrix', 'TEST') return km
def distribution_histogram(fm_train=traindna, fm_cube=cubedna, order=3, gap=0, reverse="n"): # sg('new_distribution', 'HISTOGRAM') sg("add_preproc", "SORTWORDSTRING") sg("set_features", "TRAIN", fm_train, "DNA") sg("convert", "TRAIN", "STRING", "CHAR", "STRING", "WORD", order, order - 1, gap, reverse) sg("attach_preproc", "TRAIN")
def kernel_linearword (fm_train_word=trainword,fm_test_word=testword, size_cache=10, scale=1.4): sg('set_features', 'TRAIN', fm_train_word) sg('set_features', 'TEST', fm_test_word) sg('set_kernel', 'LINEAR', 'WORD', size_cache, scale) km=sg('get_kernel_matrix', 'TRAIN') km=sg('get_kernel_matrix', 'TEST') return km
def _evaluate(indata): prefix = 'distribution_' # what is sg('likelihood')? likelihood = abs(sg('hmm_likelihood') - indata[prefix + 'likelihood']) return util.check_accuracy(indata[prefix + 'accuracy'], likelihood=likelihood) # best path? which? no_b_trans? trans? trans_deriv? if indata['name'] == 'HMM': best_path = 0 best_path_state = 0 for i in xrange(indata[prefix + 'num_examples']): best_path += distribution.best_path(i) for j in xrange(indata[prefix + 'N']): best_path_state += distribution.get_best_path_state(i, j) best_path = abs(best_path - indata[prefix + 'best_path']) best_path_state=abs(best_path_state-\ indata[prefix+'best_path_state']) return util.check_accuracy(indata[prefix + 'accuracy'], derivatives=derivatives, likelihood=likelihood, best_path=best_path, best_path_state=best_path_state) else: return util.check_accuracy(indata[prefix + 'accuracy'], derivatives=derivatives, likelihood=likelihood)
def _train (indata, prefix): if indata[prefix+'type']=='knn': sg('train_classifier', indata[prefix+'k']) elif indata[prefix+'type']=='lda': sg('train_classifier', indata[prefix+'gamma']) elif indata[prefix+'type']=='perceptron': # does not converge try: sg('train_classifier') except RuntimeError: import sys sys.exit(0) else: if indata.has_key(prefix+'C'): sg('c', double(indata[prefix+'C'])) sg('train_classifier')
def _evaluate (indata): prefix='distribution_' # what is sg('likelihood')? likelihood=abs(sg('hmm_likelihood')-indata[prefix+'likelihood']) return util.check_accuracy(indata[prefix+'accuracy'], likelihood=likelihood) # best path? which? no_b_trans? trans? trans_deriv? if indata['name']=='HMM': best_path=0 best_path_state=0 for i in xrange(indata[prefix+'num_examples']): best_path+=distribution.best_path(i) for j in xrange(indata[prefix+'N']): best_path_state+=distribution.get_best_path_state(i, j) best_path=abs(best_path-indata[prefix+'best_path']) best_path_state=abs(best_path_state-\ indata[prefix+'best_path_state']) return util.check_accuracy(indata[prefix+'accuracy'], derivatives=derivatives, likelihood=likelihood, best_path=best_path, best_path_state=best_path_state) else: return util.check_accuracy(indata[prefix+'accuracy'], derivatives=derivatives, likelihood=likelihood)
def kernel_diag (fm_train_real=traindat,fm_test_real=testdat,diag=23., size_cache=10): sg('set_features', 'TRAIN', fm_train_real) sg('set_features', 'TEST', fm_test_real) sg('set_kernel', 'DIAG', 'REAL', size_cache, diag) km=sg('get_kernel_matrix', 'TRAIN') km=sg('get_kernel_matrix', 'TEST') return km
def distance_manhatten (fm_train_real=traindat,fm_test_real=testdat): sg('set_distance', 'MANHATTAN', 'REAL') sg('set_features', 'TRAIN', fm_train_real) dm=sg('get_distance_matrix', 'TRAIN') sg('set_features', 'TEST', fm_test_real) dm=sg('get_distance_matrix', 'TEST') return dm
def kernel_weighteddegreestring (fm_train_dna=traindna,fm_test_dna=testdna, size_cache=10,degree=20): sg('set_features', 'TRAIN', fm_train_dna, 'DNA') sg('set_features', 'TEST', fm_test_dna, 'DNA') sg('set_kernel', 'WEIGHTEDDEGREE', 'CHAR', size_cache, degree) km=sg('get_kernel_matrix', 'TRAIN') km=sg('get_kernel_matrix', 'TEST') return km
def distance_canberra (fm_train_real=traindat,fm_test_real=testdat): sg('set_distance', 'CANBERRA', 'REAL') sg('set_features', 'TRAIN', fm_train_real) dm=sg('get_distance_matrix', 'TRAIN') sg('set_features', 'TEST', fm_test_real) dm=sg('get_distance_matrix', 'TEST') return dm
def kernel_localalignmentstring (fm_train_dna=traindna,fm_test_dna=testdna, size_cache=10): sg('set_features', 'TRAIN', fm_train_dna, 'DNA') sg('set_features', 'TEST', fm_test_dna, 'DNA') sg('set_kernel', 'LOCALALIGNMENT', 'CHAR', size_cache) km=sg('get_kernel_matrix', 'TRAIN') km=sg('get_kernel_matrix', 'TEST') return km
def distance_minkowski(fm_train_real=traindat, fm_test_real=testdat, k=3.): sg('set_distance', 'MINKOWSKI', 'REAL', k) sg('set_features', 'TRAIN', fm_train_real) dm = sg('get_distance_matrix', 'TRAIN') sg('set_features', 'TEST', fm_test_real) dm = sg('get_distance_matrix', 'TEST') return dm
def distance_tanimoto (fm_train_real=traindat,fm_test_real=testdat): sg('set_distance', 'TANIMOTO', 'REAL') sg('set_features', 'TRAIN', fm_train_real) dm=sg('get_distance_matrix', 'TRAIN') sg('set_features', 'TEST', fm_test_real) dm=sg('get_distance_matrix', 'TEST') return dm
def kernel_gaussianshift (fm_train_real=traindat,fm_test_real=testdat, width=1.4,max_shift=2,shift_step=1,size_cache=10): sg('set_features', 'TRAIN', fm_train_real) sg('set_features', 'TEST', fm_test_real) sg('set_kernel', 'GAUSSIANSHIFT', 'REAL', size_cache, width, max_shift, shift_step) km=sg('get_kernel_matrix', 'TRAIN') km=sg('get_kernel_matrix', 'TEST') return km
def distribution_histogram(fm_train=traindna,fm_cube=cubedna,order=3, gap=0,reverse='n'): # sg('new_distribution', 'HISTOGRAM') sg('add_preproc', 'SORTWORDSTRING') sg('set_features', 'TRAIN', fm_train, 'DNA') sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse) sg('attach_preproc', 'TRAIN')