ele = i.rstrip().split() # ~ GXB01170_2018.fast5|233|23,3,1,00,2,3 0 0|0|0|0|0 0.47062142444662086|0.8176029853529686|0.7531814474848483|-0.0835983106934529|-0.5416802793696001 0.14888963355157137|0.1977707177676313|0.11147011292496822|0.18032028688587404|0.15884083915957087 0.5053374754088856|0.9240774715516673|0.7871817035819118|-0.06103521168167164|-0.5495652071815835 15|6|6|138|64 insert = [] for item in [ele[3], ele[4], ele[5], ele[6]]: for itemsub in item.split("|"): insert.append(float(itemsub)) X.append(insert) Y.append(0) ####################################### X = np.array(X) Y = np.array(Y) ######################################### #split the data to 4:1 x_train, x_test, y_train, y_test = ts(X, Y, test_size=0.2, random_state=0, shuffle=True) ################################################################################################ ###################################################################################################################################################################################### from xgboost.sklearn import XGBClassifier from sklearn import metrics print(Counter(Y)) print(Counter(y_train)) print(Counter(y_test)) clf = XGBClassifier(n_jobs=-1, learning_rate=0.3, tree_method='gpu_exact', n_estimatores=58, alpha=0.1, gamma=0,
from sklearn import svm from sklearn import datasets from sklearn.model_selection import train_test_split as ts #使用鸢尾花数据集跑skleanr的svm模型,对鸢尾花进行分类 iris = datasets.load_iris() X = iris.data y = iris.target X_train, X_test, y_train, y_test = ts(X, y, test_size=0.3) # kernel = 'rbf' clf_rbf = svm.SVC(kernel='rbf') clf_rbf.fit(X_train, y_train) score_rbf = clf_rbf.score(X_test, y_test) print("The score of rbf is : %f" % score_rbf) # kernel = 'linear' clf_linear = svm.SVC(kernel='linear') clf_linear.fit(X_train, y_train) score_linear = clf_linear.score(X_test, y_test) print("The score of linear is : %f" % score_linear) # kernel = 'poly' clf_poly = svm.SVC(kernel='poly') clf_poly.fit(X_train, y_train) score_poly = clf_poly.score(X_test, y_test) print("The score of poly is : %f" % score_poly)
}) winner_name = ([ row['Winner'], row['Runners-Up'], row['Third'], row['Fourth'] ]) results = dc.Neg_Rec(data_Matches, row['Year'], winner_name) data_train.extend(results) data_training = pd.DataFrame(data_train) dt = data_training #dt['1'], dt['2'], dt['3'], dt['4'], dt['5'], dt['6'], dt['7'], dt['8'], dt['9'], dt['10'], dt['11'], dt['12'], dt['13'], dt['14'], dt['15'], dt['16'] = zip(*dt['name'].map(lambda x: x.split(' '))) #dt.drop(['name'], axis = 1, inplace = True) from sklearn.model_selection import train_test_split as ts train_df, test_df = ts(dt, test_size=0.01) char_cnn = cp.CharCNN(max_len_s=256, max_num_s=1) char_cnn.preporcess(labels=dt['label'].unique()) x_train, y_train = char_cnn.process(df=train_df, x_col='name', y_col='label') x_test, y_test = char_cnn.process(df=test_df, x_col='name', y_col='label') char_cnn.build_model() char_cnn.train(x_train, y_train, x_test, y_test, batch_size=32, epochs=10) y_pred = char_cnn.predict(x_test) plt.figure(figsize=(12, 6)) sns.countplot(data_Cups['Winner'])
feature_set = [] labels = [] c = len(emails) for email in emails: data = [] f = open(email, encoding='cp437') words = f.read().split(" ") for entry in dictionary: data.append(words.count(entry[0])) feature_set.append(data) if "ham" in email: labels.append(0) if "spam" in email: labels.append(1) print(c) c = c - 1 return feature_set, labels d = dict() features, labels = make_dataset(d) #creates a dataset based on wordcount #print(len(features),len(labels)) x_train, x_test, y_train, y_test = ts(features, labels, test_size=0.2) clf = MultinomialNB() clf.fit(x_train, y_train) pred = clf.predict(x_test) print(accuracy_score(y_test, pred)) save(clf, "spam-classifier.mdl")
import torch.nn as nn from sklearn.metrics import accuracy_score import matplotlib.pyplot as plt import torch.nn.functional as F #Preprocess Data torch.manual_seed(0) #to repeat results data = pd.read_csv('iris.csv').set_index('Id') dummies = pd.get_dummies(data['Species']) #from categoricals to dummies data = pd.concat([data, dummies], axis=1).drop('Species', axis=1) #add dummies back print(data.shape) X_train, X_test, y_train, y_test = ts(data.iloc[:, :-3], data.iloc[:, -3:], test_size=0.2, shuffle=True, random_state=0) #convert data to tensors X_train = torch.from_numpy(X_train.values).float() X_test = torch.from_numpy(X_test.values).float() y_train = torch.from_numpy(y_train.values).float() y_test = torch.from_numpy(y_test.values).float() #Create data loader train_set = TensorDataset(X_train, y_train) test_set = TensorDataset(X_test, y_test) train_loader = DataLoader( train_set, batch_size=X_train.shape[0], shuffle=True,
def start(): print 'reading dataset' ratings = pd.read_csv('dataset/ratings.dat', sep="::", header=None, engine='python') ratings_pivot = pd.pivot_table(ratings[[0, 1, 2]], values=2, index=0, columns=1).fillna(0) train, test = ts(ratings_pivot, train_size=0.8) # how many nodes nodes_in = 3706 nodes_hidden = 256 nodes_out = 3706 hidden_layer = { 'weights': tf.Variable(tf.random_normal([nodes_in + 1, nodes_hidden])) } output_layer = { 'weights': tf.Variable(tf.random_normal([nodes_hidden + 1, nodes_out])) } input_layer = tf.placeholder('float', [None, 3706]) input_layer_const = tf.fill([tf.shape(input_layer)[0], 1], 1.0) input_layer_concat = tf.concat([input_layer, input_layer_const], 1) # multiply output of input_layer wth a weight matrix layer_1 = tf.nn.sigmoid( tf.matmul(input_layer_concat, hidden_layer['weights'])) layer1_const = tf.fill([tf.shape(layer_1)[0], 1], 1.0) layer_concat = tf.concat([layer_1, layer1_const], 1) # multiply output of hidden with a weight matrix to get final output output_layer = tf.matmul(layer_concat, output_layer['weights']) output = tf.placeholder('float', [None, 3706]) cost_function = tf.reduce_mean(tf.square(output_layer - output)) optimizer = 0.1 optimizer = tf.train.AdagradOptimizer(optimizer).minimize(cost_function) init = tf.global_variables_initializer() session = tf.Session() session.run(init) batch_size = 100 epochs = 200 images = train.shape[0] training(batch_size, cost_function, epochs, images, input_layer, optimizer, output, output_layer, session, test, train) user = test.iloc[99, :] pred = session.run(output_layer, feed_dict={input_layer: [user]}) # TODO guardar las predicciones para no entrenarla cada vez print pred