Exemple #1
0
def merge_synonym():
    synsets = load_csv('./data/synsets/ANEW_synsets.csv')
    syn_clusters = []
    for i, synset_i in enumerate(synsets):
        for synset_j in synsets[i + 1:]:
            if synset_i[0] in synset_j or synset_j[0] in synset_i:
                syn_cluster = (synset_i[0], synset_j[0])
                syn_clusters.append(syn_cluster)
    # zz = set()
    # for ll in syn_clusters:
    #     zz=zz.union(set(ll))
    # print(len(zz))
    # exit()
    outs = []
    for a, b in syn_clusters:
        # 如果a, b 都没有出现过
        if all(len(set([a, b]) & set(l)) == 0 for l in outs):
            # 创建新的
            out = [a, b]
            outs.append(out)
        # 否则
        else:
            # 合并进去
            for i, k in enumerate(outs):
                if set([a, b]) & set(k) != set():
                    outs[i] = list(set(outs[i] + [a, b]))
                    break

    # leng = 0
    # for i, j in enumerate(outs):
    #     leng += len(j)
    #     print('| cluster_%s | %s |' % (str(i), str(j)))
    # print(leng)
    return outs
def replacer(word=None):
    syn_map = dict()
    synsets = load_csv('./data/synsets/ANEW_synsets.csv')
    for synset in synsets:
        if len(synset)>1:
            for w in synset[1:]:
                syn_map[w]=synset[0]

    # if word in syn_map.keys():
    #     return syn_map[word]
    return syn_map
def replacer(word=None):
    syn_map = dict()
    synsets = load_csv('./data/synsets/ANEW_synsets.csv')
    for synset in synsets:
        if len(synset) > 1:
            for w in synset[1:]:
                syn_map[w] = synset[0]

    # if word in syn_map.keys():
    #     return syn_map[word]
    return syn_map
def dialog_load_network():

    dir_name = 'network_data'

    if path.isdir(dir_name) == False:
        dir_name = getcwd()

    f_name = QFileDialog.getOpenFileName(None,
                                         'Load Electic Network',
                                         directory=dir_name,
                                         filter="Network files *.csv")

    try:
        assert (os.path.exists(f_name))
    except AssertionError:
        sys.exit(' *** No file selected *** ')

    return load_csv(str(f_name))
Exemple #5
0
def sonar_run():
    seed(2)
    filePath = '../data/sonar.csv'
    dataset = load_data.load_csv(filePath, True)
    # convert string attributes to integers
    for i in range(0, len(dataset[0]) - 1):
        load_data.str_column_to_float(dataset, i)
    # convert class column to integers
    load_data.str_column_to_int(dataset, len(dataset[0]) - 1)
    # evaluate algorithm
    n_folds = 5
    max_depth = 10
    min_size = 1
    sample_size = 1.0
    n_features = int(sqrt(len(dataset[0]) - 1))
    for n_trees in [1, 5, 10]:
        scores = evaluate_split.evaluate_algorithm(dataset,
                                                   randomforest.random_forest,
                                                   n_folds, max_depth,
                                                   min_size, sample_size,
                                                   n_trees, n_features)
        print('Trees: %d' % n_trees)
        print('Scores: %s' % scores)
        print('Mean Accuracy: %.3f%%' % (sum(scores) / float(len(scores))))
Exemple #6
0
Created on Thu May  3 11:51:18 2018
decission tree with banknote
@author: shifuddin
"""

from load_data import load_csv
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from math import sqrt
'''
Load feature values as X and target as Y
here we read day dataset
'''
uri = 'https://archive.ics.uci.edu/ml/machine-learning-databases/forest-fires/forestfires.csv'
X, y = load_csv(uri, ',', 4, 12, 12, 13, True)
'''
Split into training and test set
'''
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=1)
'''
Feature scaling 
'''
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
sc_y = StandardScaler()
Exemple #7
0
Created on Sun Apr 15 22:01:23 2018
knn with Concrete Slump dataset from uci
@author: shifuddin
"""
from sklearn.model_selection import train_test_split
from sklearn import neighbors
from sklearn.metrics import mean_squared_error
from math import sqrt
from load_data import load_csv
'''
Load feature values as X and target as Y
here we read day dataset
'''

uri = 'https://archive.ics.uci.edu/ml/machine-learning-databases/concrete/slump/slump_test.data'
X, y = load_csv(uri, ',', 1, 8, 8, 11, True)
'''
Split into training and test set
'''
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=1)

knn_regressor = neighbors.KNeighborsRegressor(algorithm='auto',
                                              n_neighbors=30,
                                              weights='uniform')
knn_regressor.fit(X_train, y_train)

y_pred = knn_regressor.predict(X_test)
Exemple #8
0
Created on Thu May  3 11:51:18 2018
decission tree with banknote
@author: shifuddin
"""

from load_data import load_csv
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from math import sqrt
'''
Load feature values as X and target as Y
here we read day dataset
'''
uri = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00374/energydata_complete.csv'
X, y = load_csv(uri, ',', 1, 27, 27, 28, True)
'''
Split into training and test set
'''
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=1)
'''
Fit DecisionTreeRegressor with Bike Day data
'''
regressor = RandomForestRegressor(n_estimators=10, random_state=0)
regressor.fit(X_train, y_train)
'''
Predicting result
'''
Exemple #9
0
from tensorflow.keras.callbacks import (TensorBoard, ModelCheckpoint,
                                        EarlyStopping)

from load_data import load_csv, train_valid_test_datasets, show_batch
from features import (PackNumericFeatures, categorical2onehot,
                      categorical2embedding, normalization)
from utils import get_unique
from train_model import get_dense_two_layer_net

# load data and create Dataset obj
train_fileName = '../inputs/train.csv'
test_fileName = '../inputs/test.csv'

batch_size = 128  # 32

train_data, test_data = load_csv(train_fileName, test_fileName)
train_data.pop("id")
test_data_id = test_data.pop("id")
train_dataset, valid_dataset, test_dataset = train_valid_test_datasets(
    train_data,
    test_data,
    valid_size=0.2,
    batch_size=batch_size,
    test_shuffle=False)
train_size = int(train_data.shape[0] * 0.8)
valid_size = int(train_data.shape[0] * 0.2)
print(train_data.shape, test_data.shape)
print(train_dataset.element_spec)

numeric_features = ['month', 'day']
train_dataset = train_dataset.map(PackNumericFeatures(numeric_features))
num_classes = 30
seq_len = 4500 if is_dna_data else 1500
model_name = 'blstm_openset'
data_dir = '/mnt/data/computervision/train80_val10_test10'

if is_dna_data:
    model_name = 'blstm_dna_conv3_4500'
    data_dir = '/mnt/data/computervision/dna_train80_val10_test10'

model_file = '../models/' + model_name + '.h5'

model = load_model(model_file)
av_model = Model(inputs=model.input, outputs=model.get_layer("AV").output)
print av_model.summary()

train_data = load_csv(data_dir + '/train.csv')

batch_size = 10000
avs = []
actual = []
lower = 0
while lower < len(train_data):
    print lower
    upper = min(lower + batch_size, len(train_data))
    x, y = get_onehot(train_data[lower:upper],
                      None,
                      is_dna_data=is_dna_data,
                      seq_len=seq_len)
    pred = av_model.predict(x, batch_size=500)
    avs.append(pred)
    actual.append(y)
Exemple #11
0
# -*- coding: utf-8 -*-
"""
Created on Sat Apr 21 23:19:47 2018

@author: shifuddin
"""
from load_data import load_csv
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
'''
Load X, y from uri
'''
uri = 'https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/breast-cancer-wisconsin.data'
X, y = load_csv(uri, ',', 1, 5, 9, 10)
'''
Split into training and test set
'''
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=1)

# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
'''
Perform logistic regression
'''
Exemple #12
0
"""
Created on Thu May  3 11:51:18 2018
decission tree with banknote
@author: shifuddin
"""

from load_data import load_csv
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB 
from sklearn.metrics import confusion_matrix

'''
Load X, y from uri
'''
uri = 'https://archive.ics.uci.edu/ml/machine-learning-databases/spect/SPECTF.test'
X, y = load_csv(uri,',', 1, 45, 0, 1, True)

'''
Split into training and test set
'''
X_train, X_test, y_train, y_test =train_test_split(X, y,test_size=0.2, random_state=1)

'''
Feature scaling 
'''
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

'''
Exemple #13
0
import numpy as np
import normalizer as norm
from load_data import load_csv

base_path = '/home/daniel/Documentos/Projetos/TCC/Normalizador/tests'  # Trocar por caminho relativo do projeto.

raw_data = load_csv(f'{base_path}/raw_data.csv')
normalized = norm.normalizer(raw_data)

np.savetxt(f'{base_path}/normalized.csv', normalized, fmt='%.8f')

print(normalized)
save_stats = True
num_classes = 100

mask = True
mask_len = 113
model_template = dna_mask_blstm

num_letters = 4 if is_dna_data else 26


model = model_template(num_classes, num_letters, sequence_length, embed_size=256, mask_length=mask_len if mask else None)

model.load_weights(model_file)
model.summary()

test_data = load_csv(data_dir + '/test.csv', divide=2 if is_dna_data else 1)
print len(test_data)

crop_count = 0.0
for seq, y in test_data:
	if len(seq) > sequence_length:
		crop_count += 1
print "percent cropped: ", crop_count / len(test_data)	

test_x, test_y, test_m = get_onehot(test_data, None, is_dna_data=is_dna_data, seq_len=sequence_length, num_classes=num_classes, rand_start=random_crop, mask_len=mask_len if mask else None)
if print_acc:
	print "test accuracy: ", model.evaluate([test_x, test_m] if mask else test_x, test_y, batch_size=100)

if save_stats:
	pred = model.predict([test_x, test_m] if mask else test_x, batch_size=100).argmax(axis=-1)
	log = Logger(model_name, num_classes, sequence_length)
Exemple #15
0
import numpy as np
import paraconsistent
from load_data import load_csv

base_path = 'C:/Users/guermandi/Desktop/TCC/AnaliseParaconsistente/tests'

pathological = load_csv(f'{base_path}/patologicos-normalizados.csv')
healthy = load_csv(f'{base_path}/saudaveis-normalizados.csv')

pathological = np.delete(pathological, (0, 1, 2, 4, 5, 6), 1)
healthy = np.delete(healthy, (0, 1, 2, 4, 5, 6), 1)

classes = np.array([np.array(pathological), np.array(healthy)])

alpha = paraconsistent.alpha(classes)
beta = paraconsistent.beta(classes)

assurance = paraconsistent.assurance(alpha, beta)
contradiction = paraconsistent.contradiction(alpha, beta)

truth = paraconsistent.truth(assurance, contradiction)

# Classes de dados
classes = np.array([np.array(pathological), np.array(healthy)])

# Alfa e Beta
alpha = paraconsistent.alpha(classes)
beta = paraconsistent.beta(classes)

# Ponto G1
assurance = paraconsistent.assurance(alpha, beta)
	return ''.join(l)





model = load_model(model_file)
model.summary()

results = []

for percent in range(2,22,2):
	#mode 0: substitute, mode 1: 3-aligned cut, mode 2: unaligned cut
	row = [percent]
	for mode in range(3):
		test_data = load_csv(data_dir + '/test.csv', divide=2)
		print len(test_data)
	
		for i in range(len(test_data)):
			(x, y) = test_data[i]
			if mode == 0:
				test_data[i] = (substitute(x, percent), y)
			else:
				test_data[i] = (delete_segment(x, percent, mode == 1), y)
			#if i % 100000 == 99999:
			#	print i+1

		test_x, test_y, test_m = get_onehot(test_data, None, is_dna_data=True, seq_len=sequence_length, num_classes=num_classes, mask_len=mask_len)
	
		acc =  model.evaluate([test_x, test_m], test_y, batch_size=100, verbose=1)[1]
		print percent, mode, acc
Exemple #17
0
from ml_logging import Logger

num_classes = 30
num_amino_acids = 26

model = Sequential()
model.add(Masking(mask_value=0, input_shape=(1500, num_amino_acids)))
model.add(LSTM(50, activation='tanh'))
model.add(Dense(num_classes, activation='softmax'))
model.compile(optimizer=Adam(lr=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.summary()

data_dir = '/mnt/data/computervision/train80_val10_test10'
train_data = load_csv(data_dir + '/train.csv')
print len(train_data)
val_data = load_csv(data_dir + '/validation.csv')
val_x, val_y = get_onehot(val_data, None)
print len(val_data)

logger = Logger('lstm50')

save_path = '../models/lstm50.h5'

num_episodes = 20000
for i in range(num_episodes):
    x, y = get_onehot(train_data, 1000)
    print i
    print model.train_on_batch(x, y)
    if (i % 1000 == 0) or i == num_episodes - 1:
Exemple #18
0
[5.332441248,2.088626775,1],
[6.922596716,1.77106367,1],
[8.675418651,-0.242068655,1],
[7.673756466,3.508563011,1]]
    n_inputs = len(dataset[0]) - 1
    n_outputs = len(set([row[-1] for row in dataset]))
    network = initialize_network(n_inputs, 2, n_outputs)
    train_network(network, dataset, 0.5, 20, n_outputs)
    for layer in network:
        print(layer)
    for row in dataset:
        prediction = predict(network, row)
        print('Expected=%d, Got=%d' % (row[-1], prediction))

    filename = 'seeds_dataset.csv'
    dataset = load_csv(filename)
    for i in range(len(dataset[0])-1):
        print(dataset[i])
        str_column_to_float(dataset, i)

    str_column_to_int(dataset, len(dataset[0])-1)

    minmax = dataset_minmax(dataset)
    normalize_dataset(dataset, minmax)
    
    
    
    n_folds = 5
    l_rate = 0.3
    n_epoch = 50
    n_hidden = 5
Exemple #19
0
decission tree with banknote
@author: shifuddin
"""

from load_data import load_csv
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from math import sqrt
import pandas as pd
'''
Load feature values as X and target as Y
here we read day dataset
'''
uri = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00244/fertility_Diagnosis.txt'
X, y = load_csv(uri, ',', 0, 9, 9, 10, True)
y = pd.get_dummies(y.ravel(), drop_first=True)
'''
Split into training and test set
'''
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=1)
'''
Feature scaling 
'''
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
Exemple #20
0
    data = data.asfreq(step, method='bfill')
    data = data.reset_index()

    return data


def train_test_split(data, train_ratio, method=0):
    train = []
    test = []

    if method == 0:
        n_rows = int(train_ratio * len(data))
        train.append(data.iloc[:n_rows, :])
        test.append(data.iloc[n_rows:, :])

    return train, test


if __name__ == "__main__":

    names_dict = {"Date": "Local time"}
    data = load_csv(csv_name="EURCAD_Ticks_05.12.2017-05.12.2017.csv",
                    names_dict=names_dict)

    print(data["Date"].head())
    sys.exit(0)
    print(data.shape)
    data = select_data(dataframe=data, start="2017/05/13", stop="2017/05/20")
    print(data.shape)
    print(data["Date"].head())
Exemple #21
0
# -*- coding: utf-8 -*-
"""
Created on Thu Apr 12 13:23:12 2018

@author: shifuddin
"""

from sklearn.neural_network import MLPClassifier
from load_data import load_csv
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
'''
Load X, y from uri
'''
uri = 'https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/breast-cancer-wisconsin.data'
X, y = load_csv(uri, ',', 1, 5, 9, 10, True)
'''
Split into training and test set
'''
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=1)
'''
Feature scaling 
'''
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
'''
Exemple #22
0
input_file = '/mnt/data/computervision/dna_100class_train80_val10_test10/test.csv'
display_classes = 10
n = 100

is_dna_data = True
seq_len = 4500
mask_len = 113

model_file = '../models/' + model_name + '.h5'
model = load_model(model_file)
embed_model = Model(inputs=model.input,
                    outputs=model.get_layer("lstm_2").output)
embed_model.summary()

counts = np.zeros(display_classes, dtype=np.int8)
data = load_csv(input_file, divide=1)
chosen_data = []

for (x, y) in data:
    if y < display_classes and counts[y] < n:
        counts[y] = counts[y] + 1
        chosen_data.append((x, y))

x, y, m = get_onehot(chosen_data,
                     None,
                     is_dna_data=is_dna_data,
                     seq_len=seq_len,
                     mask_len=mask_len)
embed = embed_model.predict([x, m], batch_size=100, verbose=1)

tsne = TSNE(n_components=2, random_state=0)
Exemple #23
0
"""
Created on Thu May  3 11:51:18 2018
decission tree with banknote
@author: shifuddin
"""

from load_data import load_csv
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB 
from sklearn.metrics import confusion_matrix

'''
Load X, y from uri
'''
uri = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00267/data_banknote_authentication.txt'
X, y = load_csv(uri,',', 0,4, 4,5, True)

'''
Split into training and test set
'''
X_train, X_test, y_train, y_test =train_test_split(X, y,test_size=0.2, random_state=1)

'''
Feature scaling 
'''
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

'''
Exemple #24
0
Created on Thu May  3 11:51:18 2018
decission tree with banknote
@author: shifuddin
"""

from load_data import load_csv
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error
from math import sqrt
'''
Load feature values as X and target as Y
here we read day dataset
'''
uri = 'https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv'
X, y = load_csv(uri, ';', 0, 11, 11, 12, True)
'''
Split into training and test set
'''
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=1)
'''
Fit DecisionTreeRegressor with Bike Day data
'''
regressor = DecisionTreeRegressor(random_state=0)
regressor.fit(X_train, y_train)
'''
Predicting result
'''
Exemple #25
0
def replacer():
    outs = merge_synonym()
    syn_map = load_csv('./data/synsets/ANEW_synsets.csv')
    replace_map = build_syn_map(syn_map, outs)
    print(replace_map['gusto'])
    return replace_map
seq_len = 4500
#data_file = '/mnt/data/computervision/dna_train80_val10_test10/test.csv'
data_file = '../results/dna_unknown_100class_pairs.csv'  #keep this 1000class so every model uses the same data

mask = False
mask_len = 113

model_file = '../models/' + model_name + '.h5'
model = load_model(model_file)
embed_model = Model(inputs=model.input,
                    outputs=model.get_layer("lstm_2").output)
print embed_model.summary()

single_dict = dict()
pair_dict = dict()
data = load_csv(data_file)
for (x, y) in data:
    if y in pair_dict:
        continue
    if y in single_dict:
        assert x != single_dict[y]
        pair_dict[y] = [single_dict[y], x]
    else:
        single_dict[y] = x
    if len(pair_dict) == num_classes:
        break

chosen_data = []
for i in range(2):
    for y in pair_dict:
        x = pair_dict[y][i]
Exemple #27
0
num_classes = 30

model_name = 'blstm_dna_conv3_4500'
data_file = '/mnt/data/computervision/dna_train80_val10_test10/test.csv'
#data_file = '/mnt/data/computervision/dna_train80_val10_test10/unknowns.csv'
data_divide = 4
dist_min = 0
dist_max = 20

model_file = '../models/' + model_name + '.h5'
model = load_model(model_file)
av_model = Model(inputs=model.input, outputs=model.get_layer("AV").output)
print av_model.summary()

data = load_csv(data_file, divide=data_divide)
print len(data)
x, y = get_onehot(data,
                  None,
                  is_dna_data=is_dna_data,
                  seq_len=4500 if is_dna_data else 1500)
avs = av_model.predict(x, batch_size=500)

print 'done getting avs'
del data, x, y

means = []
with open('../results/' + model_name + '_mean_activations.csv', 'r') as infile:
    r = csv.reader(infile)
    for row in r:
        means.append(np.array(row, dtype=np.float32))
Exemple #28
0
Created on Thu May  3 11:51:18 2018
decission tree with banknote
@author: shifuddin
"""

from load_data import load_csv
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error
from math import sqrt
'''
Load feature values as X and target as Y
here we read day dataset
'''
uri = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00265/CASP.csv'
X, y = load_csv(uri,',', 1,10,0,1, True)


'''
Split into training and test set
'''
X_train, X_test, y_train, y_test =train_test_split(X, y,test_size=0.2, random_state=1)

'''
Fit DecisionTreeRegressor with Bike Day data
'''
regressor = DecisionTreeRegressor(random_state = 0)
regressor.fit(X_train, y_train)

'''
Predicting result
Exemple #29
0
# %%
from load_data import load_csv
from analyze_data import show_beta, show_gamma
from sir_model import long_time_later
from graph_plot import compare_graph

# %% load data
directory = "C:\\Users\\HasunSong\\PycharmProjects\\virus\\covid19_korea.csv"
data = load_csv(directory=directory)
header = data[0]  #['날짜', '치료중', '누적확진', '누적격리해제', '누적사망', '확진', '격리해제', '사망']

# %% guess beta and gamma
show_beta(data)
show_gamma(data)
# 03.26. 기준
# %% 모델 돌려보기
BETA = 3e-10
GAMMA = 0.05
TOT_POP = 50000000
DAYS = 3000
rec = long_time_later([TOT_POP, 10000, 0], DAYS, beta=BETA, gamma=GAMMA)

# %% 실제 상황과 비교하기
compare_graph(data, rec, BETA, GAMMA)

# %% 여러 값에 대해
beta_list = [3e-10, 5e-10, 1e-9, 2.5e-9]
gamma_list = [0.01, 0.02, 0.03, 0.05]
for bt in beta_list:
    for gm in gamma_list:
        rec = long_time_later([TOT_POP, 10000, 0], DAYS, beta=bt, gamma=gm)
Exemple #30
0
# -*- coding: utf-8 -*-
"""
Created on Sun Apr 22 21:26:57 2018
Kmean with bc wisconsin
@author: shifuddin
"""
from load_data import load_csv
from sklearn.cluster import KMeans
from sklearn.metrics import homogeneity_score
import pandas as pd
'''
Load X, y from uri
'''
uri = 'https://archive.ics.uci.edu/ml/machine-learning-databases/spect/SPECT.test'
X, y = load_csv(uri, ',', 1, 24, 0, 1, True)
'''
Fitting K-Means to the dataset
'''
kmeans = KMeans(n_clusters=10,
                init='k-means++',
                random_state=42,
                max_iter=1000)
y_kmeans = kmeans.fit_predict(X)
cluster_centers = kmeans.cluster_centers_
labels = kmeans.labels_
homo_score = homogeneity_score(y.ravel(), y_kmeans)
Exemple #31
0
# -*- coding: utf-8 -*-
"""
Created on Sat Apr 21 23:06:32 2018
uci bancknote authetication dataset
@author: shifuddin
"""
from load_data import load_csv
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
'''
Load X, y from uri
'''
uri = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00267/data_banknote_authentication.txt'
X, y = load_csv(uri, ',', 0, 4, 4, 5)
'''
Split into training and test set
'''
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=1)

# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
'''
Perform logistic regression
'''