def main(train_set, test_set, iter=2): split("yelp_cat.csv") iter = int(iter) X_train, X_test, Y_train, Y_test = load(train_set, test_set) p = perceptronAverage(iter, X_train, Y_train) p.train() print "ZERO-ONE LOSS=" + str(p.test(X_test,Y_test))
def quadtree_score(self, table, level=0): # at this step, need to compute sample size # so that with 95% confidence, the margin # of error is within 10% of the true value. if level > self.max_levels: raise best_split, best_est, best_parts = None, None, [] for split in self.possible_splits(table): _logger.debug("Checking Split\t%s\t%s", split.attr.name, ','.join(map(str,map(len, split())))) partinfo = [] for partition in split(): if len(partition) == 0: continue stats = self.evaluate(partition) partinfo.append( stats ) split_est = self.evaluate_split(partinfo) if len(partinfo) > 1 and (not best_split or split_est < best_est): best_split = split best_est = split_est best_parts = partinfo if not best_split: raise _logger.info( "quadtree\t%d\t%d\t%s\t%f", len(table), level, best_split.attr.name, best_est) for partition, stats in zip(best_split(table), best_parts): if self.should_stop(partition, stats ): _logger.info( "stopped on partition\t%d\test(%f)\tstd(%f)", len(partition), stats.est, stats.std) for row in partition: if row[self.SCORE_ID].value == -inf: row[self.SCORE_ID] = stats.est else: self.errprob.append( min(1.0, self.errprob[-1] * len(table) / len(partition)) ) self.quadtree_score(partition, level+1) self.errprob.pop()
def begin(self, box1, box2): sign = signatures() spl = split() idx_sign = sign.get_signatures_triple(box1, box2) sort, in2sorted, sorted2in = sign.my_sort(idx_sign) tri_sign, tri_sign_i = sign.sort_signatures(box1, box2, in2sorted) split_sign = spl.split(tuple(sort), tri_sign, tri_sign_i) table = sign.ret_original_order(split_sign, sorted2in) return table
def recognize(path=constents.imgPath, description=""): print('验证码描述:' + description) mode = 5 img = cv.imread(path) imgs = split(img, mode, line=1) if not os.path.exists("temp"): os.mkdir("temp") labels = [] for idx, img in enumerate(imgs): cv.imwrite("temp/{}.png".format(idx), img) files = {'images': open("temp/{}.png".format(idx), 'rb')} try: r = requests.post(constents.url, files=files, headers=constents.headers) rr = json.loads(r.text) print(r.text) labels.append(rr["predicted_label"]) except: labels.append('-') return "".join(labels)
def setup_world(): global tris verts = 0 tri = [] f = open(os.path.join("data", "world.txt")) lines = f.readlines() f.close() for line in lines: vals = split(line) if len(vals) != 5: continue if vals[0] == '//': continue vertex = [] for val in vals: vertex.append(float(val)) tri.append(vertex) verts += 1 if (verts == 3): tris.append(tri) tri = [] verts = 0
#import numpy as np #import pandas as pd from sklearn.ensemble import RandomForestClassifier from sklearn import linear_model from sklearn.metrics import confusion_matrix from sklearn import svm #import os from split import * from clean import * from cleaning import * from sklearn.metrics import classification_report train, test = cleanTrain() trainFeatures, trainTarget = split(train) digitizeMatrix(trainFeatures) cvFeatures = trainFeatures[-6500000:] trainFeatures = trainFeatures[:-6500000] conf = [] N = len(trainTarget.columns) for i in range(0,N): target1 = trainTarget.columns[i] cvTarget = trainTarget[target1][-6500000:] Target = trainTarget[target1][:-6500000]
def main(): data1 = pd.read_csv('Admission_Predict.csv') data2 = pd.read_csv('Admission_Predict_Ver1.1.csv') data = pd.concat([data1, data2]).drop('Serial No.', axis=1) target = data['Chance of Admit '] features = data.drop('Chance of Admit ', axis=1) X_train, X_test, y_train, y_test = split(features, target) gb = model.base_model() pred = gb.predict(X_test) print("Our baseline model without tuning gave an R2 of {}".format( model.performance_metric(y_test, pred))) # Tune 1 model.optimize(X_train, y_train, regressor=gb, parameter={'n_estimators': [1, 2, 4, 8, 16, 32, 64, 100]}) visuals.plot_optimization( regressor=gb, parameter={'n_estimators': [1, 2, 4, 8, 16, 32, 64, 100]}) gb = gb.set_params(n_estimators=50) # Tune 2 model.optimize(X_train, y_train, regressor=gb, parameter={ 'max_depth': range(2, 12, 2), 'min_samples_split': range(6, 18, 2) }) visuals.plot_optimization(regressor=gb, parameter={'max_depth': range(2, 20, 2)}) gb = gb.set_params(max_depth=10) # Tune 3 model.optimize(X_train, y_train, regressor=gb, parameter={ 'min_samples_split': range(6, 18, 2), 'min_samples_leaf': [3, 5, 7, 9, 12, 15] }) visuals.plot_optimization(regressor=gb, parameter={'min_samples_split': range(6, 18, 2)}) gb = gb.set_params(min_samples_split=6) visuals.plot_optimization( regressor=gb, parameter={'min_samples_leaf': [3, 5, 7, 9, 12, 15]}) gb = gb.set_params(min_samples_leaf=3) # Tune 4 model.optimize(X_train, y_train, regressor=gb, parameter={'max_features': range(1, 8)}) visuals.plot_optimization(regressor=gb, parameter={'max_features': range(1, 8)}) gb = gb.set_params(max_features=3) # Tune 5 model.optimize(X_train, y_train, regressor=gb, parameter={'subsample': [0.7, 0.75, 0.8, 0.85, 0.9, 0.95]}) visuals.plot_optimization( regressor=gb, parameter={'subsample': [0.7, 0.75, 0.8, 0.85, 0.9, 0.95]}) gb = gb.set_params(subsample=0.95) # Tune 6 model.robust_model(gb, rates=[0.05, 0.01, 0.005, 0.005], trees=[100, 500, 1000, 1500]) gb = gb.set_params(learning_rate=0.005, n_estimators=1500) gb = gb.fit(X_train, y_train) pred = gb.predict(X_test) print('Rsquare score of {}'.format( np.round(model.performance_metric(y_test, pred), decimals=5))) visuals.feature_importance(features.columns, gb.feature_importances_)
def invokeSDD(testFile, preference='RANDOM', ddmin=False): # import ipdb; ipdb.set_trace() global numberOfUnresolvedTests global numberOfTotalTests numberOfUnresolvedTests = 0 numberOfTotalTests = 0 QR = getQueryResult("clearAllLabels(L)") print "TOTAL NODES: %s" % str(len(QR['L'])) copy(testFile, currentMinimalFileName) searchHeuristic = None if preference=='TOP': searchHeuristic = "topScoringRemovableWUD(X)" elif preference=='BOTTOM': searchHeuristic = "topScoringRemovableWUD2(X)" elif preference == 'RANDOM': searchHeuristic = "topScoringRemovableWUDR(X)" elif preference == 'AVERAGE': searchHeuristic = "topScoringRemovableWUDA(X)" if ddmin: getQueryResult("markAllUntrackedDependencies(L)") t0 = time.time() while (getQueryResult("allRemovableWUD(L)")): copy(currentMinimalFileName, tentativeMinimalFileName) QR = getQueryResult(searchHeuristic) if QR is None or isVariableNone(QR['X']): break symbolToRemove = getValueFromAtom(QR['X']) # if symbolToRemove == 'sym0': # import ipdb; ipdb.set_trace() currentDeletionSet = removeNodeTransitively(tentativeMinimalFileName, symbolToRemove) result = runTest(commandName, tentativeMinimalFileName) markNodes(result, symbolToRemove) # import ipdb; ipdb.set_trace() if result == 'FAIL': copy(tentativeMinimalFileName, currentMinimalFileName) # else: # recursivelyDescend2(symbolToRemove, currentDeletionSet, result) t1 = time.time() - t0 # Now run ddmin on nodes with untracked dependencies # QR = getQueryResult("allUntrackedDependencies(L)") QR = getQueryResult("allNotPermanentlyDeleted(L)") n = 2 L = [] # import ipdb; ipdb.set_trace() if not(QR is None or isVariableNone(QR['L'])): L = map(getValueFromAtom, QR['L']) # print L if not ddmin: print "PHASE 1: %s" % str(len(L)) print "PHASE 1 TIME: %s" % str(t1) print "PHASE 1 UNRESOLVED: %d" % numberOfUnresolvedTests print "PHASE 1 TOTAL: %d\n" % numberOfTotalTests # if not ddmin: # n = len(L) copy(currentMinimalFileName, tentativeMinimalFileName) while len(L) >= 2: # print L subsets = split(L, n) some_complement_is_failing = False for subset in subsets: complement = listminus(L, subset) removeNodeList(tentativeMinimalFileName, subset) result = runTest(commandName, tentativeMinimalFileName) if result == 'FAIL': copy(tentativeMinimalFileName, currentMinimalFileName) L = complement n = max(n-1, 2) some_complement_is_failing = True break else: copy(currentMinimalFileName, tentativeMinimalFileName) if not some_complement_is_failing: if n == len(L): break n = min(n * 2, len(L)) # FIXME:HACK # import ipdb; ipdb.set_trace() print "MINIMAL CASE: %s" % str(len(L)) print "NUMBEROFUNRESOLVEDTESTS: %d" % numberOfUnresolvedTests print "TOTALTESTS: %d\n===============================\n" % numberOfTotalTests # print L move(currentMinimalFileName, tentativeMinimalFileName) with open(tentativeMinimalFileName) as ifile: with open(currentMinimalFileName, 'w') as ofile: for line in ifile: lineStrip = line.strip() if lineStrip == '' or lineStrip == ';': continue ofile.write(line)
up = seqrange[1] if down<=rs and rs+sl-1<=up: return False return True def hasRepeatseq(seqlist,rs,sl): for seqrange in seqlist: down = seqrange[0] up = seqrange[1] if down<=rs and up>=rs: return True elif down>rs and rs+sl-1>=down: return True return False try: flownamelist = split(sys.argv[1]) #flownamelist = sp[0] outlog = open(sys.argv[1].split('.',1)[0]+".log","w") for flowname in flownamelist: #try: #print flowname process(readflow(flowname),flowname) #except: #print 'Error occured in '+flowname outlog.close() except: print 'Error! please check the input' print 'Usage: python tcpla.py filename.pcap'
from split import * from planck import * import numpy as np import matplotlib.pyplot as plt from astropy import constants as const from astropy import units as u #Cargamos los datos T = np.loadtxt("sun_AM0.dat") #La funcion split ordena los datos en una matriz #Es un poco estupido pero es por una cosa de comodidad D = split(T) #El tamano de esta matriz n = D.shape #integracion numerica de los datos I = 0 for j in range(0, n[1]-1): gap = D[0, j+1] - D[0, j] trap = ((D[1, j] + D[1, j+1]) * gap/2) I += trap #integracion numerica de la funcion de Planck #No podemos partir el calculo de la integral desde cero #ya que la funcion se indefine en este valor ini = 0.000001 fin = np.pi/2
def quadtree_score(self, table, prev_splits=None): prev_splits = prev_splits or [] if len(prev_splits) > self.max_levels: raise samples = self.get_samples(table) # evaluate current partition using sample cur_stats = self.evaluate(samples) should_stop = self.should_stop(samples, cur_stats) _logger.info("Stats: %s\tpop(%d)\tsamp(%d)\t%f-%f\t%f-%f", should_stop, len(table), len(samples), cur_stats.est-2.58*cur_stats.std, cur_stats.est+2.58*cur_stats.std, min(cur_stats.vals), max(cur_stats.vals)) if should_stop: for row in table: if row[self.SCORE_ID].value == -inf: row[self.SCORE_ID] = cur_stats.est return # ok find the best split best_split, best_est, best_stats = None, None, [] for split in self.possible_splits(samples): _logger.debug("Checking Split\t%s", str(split)) stats_list = [] for partition in split(samples): if len(partition) == 0: continue stats = self.evaluate(partition) stats_list.append( stats ) split_est = self.evaluate_split(stats_list) if len(stats_list) > 1 and (not best_split or split_est < best_est): best_split = split best_est = split_est best_stats = stats_list if not best_split: raise _logger.info("Splitting on %s\t%s", best_split.attr.name, ','.join(map(str,map(len, best_split(table))))) for partition in best_split(table): prev_splits.append(best_split) self.errprob.append( min(1.0, self.errprob[-1] * len(table) / len(partition)) ) self.quadtree_score(partition, prev_splits=prev_splits) prev_splits.pop() self.errprob.pop() # sanity check, sum(partitions) == table part_sizes = map(len, best_split(table)) total_part_size = sum( part_sizes ) msg = "Partition sizes wrong: %d!=%d\t%s\t%s" msg = msg % (total_part_size, len(table), str(part_sizes), str(best_split)) assert total_part_size == len(table), msg