def mnb(): x, y = process.read_data("train.csv", 1, 1) x_test, y_test = process.read_data("test.csv", 1, 0) y_train = y.T mnb = MultinomialNB(alpha=19.6, fit_prior=False) mnb.fit(x, y_train) return mnb, x_test
def lr(): x, y = process.read_data("train.csv", 1, 1) x_train_preprocessed = preprocessing.scale(x) x_test, y_test = process.read_data("test.csv", 1, 0) x_test_preprocessed = preprocessing.scale(x_test) y_train = y.T lr = LogisticRegression(C=0.05, random_state=1, solver='saga', multi_class='multinomial', max_iter=800) lr.fit(x_train_preprocessed, y_train) return lr, x_test_preprocessed
def marker_map(self): a = all_data() df = read_data() html_dom_dict = self.gen_html_dom() html_body = self.gen_html_body() center_lat_prime = 41.7943 #df['Latitude'].mean() center_lon_prime = -87.5907# df['Longitude'].mean() #41.7943° N, 87.5907° W print (center_lat_prime, center_lon_prime) colors = html_dom_dict["marker_map_colors"] map_data =Map.process_data() points = Map.create_map_points(html_dom_dict, map_data) return html_body.format( map_points=points,#map_points_prime, center_lat=center_lat_prime, center_lon=center_lon_prime, colors = colors, style=html_dom_dict["style"])
def process_data(): df = read_data() map_data = [[str(y) for y in x[1]] for x in df.iterrows()] d = {} k = {} for street, number, lat, lon in map_data: num = int(number) if street in d: if num > d[street]: d[street] = num k[street] = [street, num, lat, lon] else: d[street] = num k[street] = [street, num, lat, lon] sorted_map_data = sorted(map_data, key = lambda x: x[1]) return sorted_map_data
def svm_train_cv(): x, y = process.read_data("train.csv", 1, 1) sscaler = StandardScaler() sscaler.fit(x) x_train_preprocessed = sscaler.transform(x) y_train = y.T best_score = 0.0 num = 0 for coef0 in [0.1, 0.12, 0.14, 0.16]: svm_rbf = svm.SVC(C=0.90, cache_size=500, kernel='sigmoid', gamma='scale', coef0=coef0) num = num + 1 scores = cross_val_score(svm_rbf, x_train_preprocessed, y_train, cv=3) score = scores.mean() print("Iteration time:{}th".format(num)) print("Current score on validation set:{:.9f}".format(score)) print("Current parameters:{:.2f}".format(coef0)) if score > best_score: best_score = score best_parameters = {"coef0": coef0} print("Best score on validation set:{:.9f}".format(best_score)) print("Best parameters:{}".format(best_parameters))
def main(): syn_symbols = {SYNCHROTRONS[s][0]: s for s in OUTPUT_SYN} data = { s: {y: [0] * days_in_year(y) for y in OUTPUT_YEARS} for s in ALL_OUTPUT } for items in read_data(): syn = SYNCHROTRONS.get(items[0][11]) name = None if items[0][8] == 'ELECTRON MICROSCOPY': # No stats for CryoEM. In most of entries _em_imaging.date is null. name = None elif syn: name = syn_symbols.get(syn[0]) elif items[0][9] in ('SEALED TUBE', 'ROTATING ANODE'): name = 'home' if not name: continue coll = parse_date(items[0][19]) if not coll or coll.year not in OUTPUT_YEARS: continue day = coll.timetuple().tm_yday data[name][coll.year][day - 1] += 1 # summary to stderr for syn in ALL_OUTPUT: totals = [sum(data[syn][year]) for year in OUTPUT_YEARS] print('%-10.10s %-17s total: %4d' % (syn, totals, sum(totals)), file=sys.stderr) # JSON to stdout print_data(data)
def svm_train(): x, y = process.read_data("train.csv", 1, 1) print(x) print(y) x_test, y_test = process.read_data("test.csv", 1, 0) sscaler = StandardScaler() sscaler.fit(x) x_train_preprocessed = sscaler.transform(x) x_test_preprocessed = sscaler.transform(x_test) y_train = y.T svm_rbf = svm.SVC(C=0.90, cache_size=500, kernel='sigmoid', gamma='auto', coef0=0.1) svm_rbf.fit(x_train_preprocessed, y_train) return svm_rbf, x_test_preprocessed
def read_text(f): """ Reads text from file f<string> -> string """ if Globals.override: from process import read_data return read_data() with open(f, 'r') as infile: return infile.read().lower()
def mlp_train(): x, y = process.read_data("train.csv", 1, 1) sscaler = StandardScaler() sscaler.fit(x) x_train_preprocessed = sscaler.transform(x) y_train = y.T mlp = MLPClassifier(activation='logistic', solver='sgd', alpha=1e-5, batch_size=200, hidden_layer_sizes=(50, 50), random_state=1, learning_rate='adaptive', max_iter=600) scores_mlp = cross_val_score(mlp, x_train_preprocessed, y_train, cv=3) print(scores_mlp) print("scores_mlp Accuracy: %0.9f (+/- %0.9f)" % (scores_mlp.mean(), scores_mlp.std() * 2))
def cnb_cv(): x, y = process.read_data("train.csv", 1, 1) #x_train_preprocessed = preprocessing.scale(x) y_train = y.T best_score = 0.0 num = 0 for alpha in [0.00001, 0.0001, 0.001, 0.01, 0.1, 1]: num = num + 1 cnb = ComplementNB(alpha=alpha, fit_prior=False) scores = cross_val_score(cnb, x, y_train, cv=4) score = scores.mean() print("Iteration time:{}th".format(num)) print("Current score on validation set:{:.9f}".format(score)) print("Current parameters:{:.2f}".format(alpha)) if score > best_score: best_score = score best_parameters = {"alpha": alpha} print("Best score on validation set:{:.9f}".format(best_score)) print("Best parameters:{}".format(best_parameters))
def lr_cv(): x, y = process.read_data("train.csv", 1, 1) x_train_preprocessed = preprocessing.scale(x) y_train = y.T best_score = 0.0 num = 0 for C in [0.03, 0.05, 0.07]: num = num + 1 lr = LogisticRegression(C=C, random_state=1, solver='saga', multi_class='multinomial', max_iter=1000, penalty='l2') scores = cross_val_score(lr, x_train_preprocessed, y_train, cv=3) score = scores.mean() print("Iteration time:{}th".format(num)) print("Current score on validation set:{:.9f}".format(score)) print("Current parameters:{:.4f}".format(C)) if score > best_score: best_score = score best_parameters = {"C": C} print("Best score on validation set:{:.9f}".format(best_score)) print("Best parameters:{}".format(best_parameters))