def loaddata_thread(self,fn): start = time.time() self.dataset = loaddata.load_data(fn) for i in self.dataset: self.init_data_Text.insert(INSERT,str(i)) self.init_data_Text.insert(INSERT,'\n') end = time.time() self.log_data_Text.insert(INSERT,'加载完毕!共'+str(len(self.dataset))+'项 耗时:'+str(round(end-start,2))+'s\n')
def click1(self): fn = tkFiledialog.askopenfilename() #选择文件夹 # fnlist = os.walk( fn ) #列出目录 # print(fn) self.dataset = loaddata.load_data(fn) print(self.dataset) # self.init_data_Text.insert(INSERT,'aojifoajeifjoaeijfoa') # for i in self.dataset: # for j in i: # self.init_data_Text.insert(INSERT,j) # self.init_data_Text.insert(INSERT,'\n') for i in self.dataset: self.init_data_Text.insert(INSERT,i) self.init_data_Text.insert(INSERT,'\n')
def train(): best_acc = 0.0 saver = tf.train.Saver() with tf.Session() as sess: init = tf.global_variables_initializer() writer = tf.summary.FileWriter('logs', sess.graph) sess.run(init) c = [] X_train, y_train = loaddata.load_data() total_batch = int(X_train.shape[0] / config.batch_size) for i in range(config.training_iters): avg_cost = 0 for batch in range(total_batch): batch_x = X_train[batch * config.batch_size:(batch + 1) * config.batch_size, :] batch_y = y_train[batch * config.batch_size:(batch + 1) * config.batch_size, :] _, co = sess.run([model.optimizer, model.cost], feed_dict={ model.x: batch_x, model.y: batch_y, model.keep_prob: 0.5 }) avg_cost += co accuet, out = sess.run([model.accuracy, model.softmax], feed_dict={ model.x: batch_x, model.y: batch_y, model.keep_prob: 1.0 }) print("train accuracy=" + "{:.6f}".format(accuet)) #print(out) c.append(avg_cost) if (i + 1) % config.display_step == 0: print("Iter " + str(i + 1) + ", Training Loss= " + "{:.6f}".format(avg_cost)) # if i>13: # if accuet>best_acc: # best_acc=accuet saver.save(sess=sess, save_path="./ckpt/test-model.ckpt") for variable in tf.trainable_variables(): print(variable) print("Optimization Finished!") writer.close()
def test_model(): # some declared variables inputImageShape = (224, 224, 3) num_of_output_classes = 2 _, testX, _, testY = load_data() model = cnn_model_structure(input_shape=inputImageShape, num_classes=num_of_output_classes) weights = '' for w in glob.glob('models\\*.h5'): weights = w model.load_weights(weights) eval = model.predict(testX) out_class=np.array([np.argmax(out) for out in eval]) ref_class = np.array([np.argmax(out) for out in testY]) print(out_class) print(ref_class) print('Acc = '+str((1-float(np.count_nonzero(ref_class-out_class))/float(len(ref_class)))*100))
def train_model(): # some declared variables randomSeed = 42 networkInitialize = glorot_normal() inputImageShape = (224, 224, 3) epoch = 200 btachSize = 32 num_of_output_classes = 2 random.seed(randomSeed) learningRate = 0.01 trainX, testX, trainY, testY = load_data() # augmentation process augmentaion = ImageDataGenerator(rotation_range=30, width_shift_range=0.1, height_shift_range=0.1, shear_range=0.2, zoom_range=0.2, horizontal_flip=True, fill_mode="nearest") checkpoint = ModelCheckpoint( 'models\\model-{epoch:03d}-{acc:03f}-{val_acc:03f}.h5', verbose=1, monitor='val_acc', save_best_only=True, mode='auto') csv_logger = CSVLogger('report\\log_' + str(learningRate) + '.csv', append=False, separator=';') # training # compile the model model = cnn_model_structure(input_shape=inputImageShape, num_classes=num_of_output_classes) model.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['accuracy']) # print(model.summary()) model = model.fit_generator(augmentaion.flow(trainX, trainY, batch_size=btachSize), validation_data=(testX, testY), steps_per_epoch=len(trainX), epochs=epoch, callbacks=[csv_logger, checkpoint])
def prepareData(): #Load data all_data = loaddata.load_data() data = all_data[1] #Stem attributes data['search_term'] = data['search_term'].map(lambda x:stem(x)) data['product_title'] = data['product_title'].map(lambda x:stem(x)) data['product_description'] = data['product_description'].map(lambda x:stem(x)) print('start brand info!') data['brand'] = data['brand'].map(lambda x:stem(x)) print('finish brand info!') data['bullet1'] = data['bullet1'].map(lambda x:stem(x)) data['bullet2'] = data['bullet2'].map(lambda x:stem(x)) data['bullet3'] = data['bullet3'].map(lambda x:stem(x)) data['bullet4'] = data['bullet4'].map(lambda x:stem(x)) data['material'] = data['material'].map(lambda x:stem(x)) data['product_info'] = data['search_term']+"\t"+data['product_title'] +"\t"+data['product_description'] # Calculate length data['len_of_query'] = data['search_term'].map(lambda x:len(x.split())).astype(np.int64) data['len_of_title'] = data['product_title'].map(lambda x:len(x.split())).astype(np.int64) data['len_of_description'] = data['product_description'].map(lambda x:len(x.split())).astype(np.int64) data['len_of_brand'] = data['brand'].map(lambda x:len(x.split())).astype(np.int64) data['len_of_b1'] = data['bullet1'].map(lambda x:len(x.split())).astype(np.int64) data['len_of_b2'] = data['bullet2'].map(lambda x:len(x.split())).astype(np.int64) data['len_of_b3'] = data['bullet3'].map(lambda x:len(x.split())).astype(np.int64) data['len_of_b4'] = data['bullet4'].map(lambda x:len(x.split())).astype(np.int64) # Search and Query data['search_term'] = data['product_info'].map(lambda x:seg_words(x.split('\t')[0],x.split('\t')[1])) data['attr'] = data['search_term']+"\t"+data['brand'] data['bullets'] = data['search_term']+"\t"+data['bullet1']+"\t"+data['bullet2']+"\t"+data['bullet3']+"\t"+data['bullet4'] data.to_csv('features.csv', sep='\t', encoding='ISO-8859-1') return all_data
import sklearn.neighbors import matplotlib.pyplot as plt from scipy import cluster from sklearn import tree from sklearn.externals.six import StringIO import pydotplus ## Cargamos el fichero loaddata donde tenemos los métodos para importar los datos import loaddata # Cargamos en la variable data los datos que vamos a utilizar para hacer el custering # Cargamos en la variable alldata los datos completos con todas las columnas para # más adelante obtener los resultados FILE = "../../Datos/DATATHON_2015_Processed.csv" data = loaddata.load_data(FILE) alldata = numpy.asarray(loaddata.load_data(FILE)) datastr = numpy.asarray(loaddata.load_all_data(FILE)) ## 1. Normalization of the data # http://scikit-learn.org/stable/modules/preprocessing.html min_max_scaler = preprocessing.MinMaxScaler() data = min_max_scaler.fit_transform(data) # 2. Compute the similarity matrix dist = sklearn.neighbors.DistanceMetric.get_metric('euclidean') matsim = dist.pairwise(data) # 3. Building the Dendrogram # http://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.linkage.html#scipy.cluster.hierarchy.linkage clusters = cluster.hierarchy.linkage(matsim, method='ward')
def trainandsave(): # 加载数据 root_path = "./data/cifar10" data_folder = "train" batch_size = 64 data_type = "train" trainloader = load_data(root_path, data_folder, batch_size, data_type) # 神经网络结构 # 输入是32*32*3=3072维度, 中间层分别是1500, 200, 输出10个维度(10个分类) # net = Batch_Net(32*32, 1500, 200, 10) # net = vgg16_bn() net = LeNet() device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print("device = ", device) net.to(device=device) # 优化器 # 学习率为0.001 optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) # 损失函数 # 损失函数也可以自己定义,我们这里用的交叉熵损失函数 celoss = nn.CrossEntropyLoss() # 训练部分 print("trainloader = ", trainloader) writer = SummaryWriter('runs/train2') # 记录 for epoch in range(250): # 训练的数据量为5个epoch,每个epoch为一个循环 running_loss = 0.0 # 定义一个变量方便我们对loss进行输出 # 这里我们遇到了第一步中出现的trailoader,代码传入数据 for i, data in enumerate(trainloader, 0): # enumerate是python的内置函数,既获得索引也获得数据 # get the inputs # data是从enumerate返回的data,包含数据和标签信息,分别赋值给inputs和labels inputs, labels = data #print("inputs = ", inputs) #print("labels = ", labels) # wrap them in Variable # 转换数据格式用Variable inputs, labels = Variable(inputs), Variable(labels) # 梯度置零,因为反向传播过程中梯度会累加上一次循环的梯度 optimizer.zero_grad() # inputs 需要从32*32的图像展开成1024 # forward + backward + optimize inputs = inputs.to(device) labels = labels.to(device) # 把数据输进CNN网络net outputs = net(inputs) loss = celoss(outputs, labels) # 计算损失值 loss.backward() # loss反向传播 计算反向梯度 optimizer.step() # 利用反向梯度 参数更新 #running_loss += loss.data[0] # loss累加 running_loss += loss.item() # loss累加 # 每个epoch要训练所有的图片,每训练完成200张便打印一下训练的效果(loss值) if (i + 1) % 200 == 0: localtime = time.asctime(time.localtime(time.time())) writer.add_scalar('running_loss', running_loss / 200, global_step=((epoch * 600) + (i + 1))) print(localtime, '[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 200)) # 然后再除以200,就得到这两百次的平均损失值 running_loss = 0.0 # 这一个200次结束后,就把running_loss归零,下一个200次继续使用 # 每50个epoch保存一次参数 if (epoch + 1) % 50 == 0: save_name = "net_params" + str(epoch + 1) + ".pkl" torch.save(net.state_dict(), save_name) print('Finished Training') # 保存神经网络 torch.save(net, 'net.pkl') # 保存整个神经网络的结构和模型参数
def diagnosticPlot(name, values): args = values['generation-args'][1] errors = [] for key in ['responsewindow']: label, value = args[key] value = parsematlab.parse(value) if isinstance(value, str): errors.append(label + '\n ' + value.replace('\n', '\n ')) args[key] = value if len(errors) > 0: Error('\n\n'.join(errors)) return response_window = args['responsewindow'] fnames = values['flist'][1] removeanomalies = args['removeanomalies'][1] weightfile = values['weightfile'][1] data = [] type = [] samplingrate = None try: for fname in fnames: result = loaddata.load_data(fname, response_window, None, removeanomalies = removeanomalies) if isinstance(result, str): Error(result) return if samplingrate == None: samplingrate = result[2] if samplingrate != result[2]: Error('Not all data files have the same sampling rate.') return data.append(result[0]) type.append(result[1]) if len(data) == 0 or len(type) == 0: Error('You must select some data to plot.') return try: data = np.concatenate(data) except ValueError: Error('Not all data files have the same number of channels.') return type = np.concatenate(type) if weightfile: weights = loaddata.load_weights(weightfile) if isinstance(weights, str): Error(weights) return classifier = np.zeros(data.shape[1:]) classifier[:weights.shape[0], :weights.shape[1]] = weights classifier_max = max(abs(classifier.max()), abs(classifier.min())) else: classifier = None if isinstance(classifier, str): Error(classifier) return num_plots = 3 if classifier == None else 4 signed_r = np.zeros(data.shape[1:]) for row in range(signed_r.shape[0]): for col in range(signed_r.shape[1]): signed_r[row, col] = stats.linregress( data[:, row, col], type )[2] signed_r_max = max(abs(signed_r.max()), abs(signed_r.min())) x = np.arange(data.shape[1]) * 1000 / samplingrate target = data[type.nonzero()[0]].mean(axis = 0) nontarget = data[(~type).nonzero()[0]].mean(axis = 0) vmin, vmax = ylim = [min(target.min(), nontarget.min()), max(target.max(), nontarget.max())] fig = pylab.figure() fig.subplots_adjust(bottom = 0.06, top = 0.93, hspace = 0.45) master_ax = ax = pylab.subplot(num_plots, 1, 1) pylab.title('Target', fontsize = 'medium') pylab.imshow(target.transpose(), interpolation = 'nearest', cmap = 'PRGn', aspect = 'auto', vmin = vmin, vmax = vmax, origin = 'lower', extent = ( 0, data.shape[1] * 1000 / samplingrate, -0.5, data.shape[2] - 0.5 ) ) pylab.xticks(fontsize = 'small') pylab.yticks(range(data.shape[2]), [str(i) for i in range(1, data.shape[2] + 1)], fontsize = 'small') pylab.axes(pylab.colorbar().ax) pylab.yticks(fontsize = 'small') ax = pylab.subplot(num_plots, 1, 2, sharex = master_ax, sharey = master_ax) pylab.title('Non-Target', fontsize = 'medium') pylab.imshow(nontarget.transpose(), interpolation = 'nearest', cmap = 'PRGn', aspect = 'auto', vmin = vmin, vmax = vmax, origin = 'lower', extent = ( 0, data.shape[1] * 1000 / samplingrate, -0.5, data.shape[2] - 0.5 ) ) pylab.xticks(fontsize = 'small') pylab.yticks(range(data.shape[2]), [str(i) for i in range(1, data.shape[2] + 1)], fontsize = 'small') pylab.axes(pylab.colorbar().ax) pylab.yticks(fontsize = 'small') ax = pylab.subplot(num_plots, 1, 3, sharex = master_ax, sharey = master_ax) pylab.title('Correlation Coefficient', fontsize = 'medium') pylab.imshow(signed_r.transpose(), interpolation = 'nearest', cmap = 'PRGn', aspect = 'auto', vmin = -signed_r_max, vmax = signed_r_max, origin = 'lower', extent = ( 0, data.shape[1] * 1000 / samplingrate, -0.5, data.shape[2] - 0.5 ) ) pylab.xticks(fontsize = 'small') pylab.yticks(range(data.shape[2]), [str(i) for i in range(1, data.shape[2] + 1)], fontsize = 'small') pylab.axes(pylab.colorbar().ax) pylab.yticks(fontsize = 'small') if classifier == None: return ax = pylab.subplot(num_plots, 1, 4, sharex = master_ax, sharey = master_ax) pylab.title('Classifier Weights', fontsize = 'medium') pylab.imshow(classifier.transpose(), interpolation = 'nearest', cmap = 'PRGn', aspect = 'auto', vmin = -classifier_max, vmax = classifier_max, origin = 'lower', extent = ( 0, data.shape[1] * 1000 / samplingrate, -0.5, data.shape[2] - 0.5 ) ) pylab.xticks(fontsize = 'small') pylab.yticks(range(data.shape[2]), [str(i) for i in range(1, data.shape[2] + 1)], fontsize = 'small') pylab.axes(pylab.colorbar().ax) pylab.yticks(fontsize = 'small') except MemoryError: Error('Could not fit all the selected data in memory.\n' + \ 'Try loading fewer data files.') return
def testWeights(name, values): flistwidget, fnames = values['flist'] weightfile = values['weightfile'][1] if not weightfile: Error('You must first generate weights or select a file from which ' + \ 'to load the weights.') return errors = [] label, value = values['test-args'][1]['matrixshape'] matrixshape = parsematlab.parse(value.lower().replace('x', ' ')) if isinstance(matrixshape, str): errors.append(label + '\n ' + value.replace('\n', '\n ')) if np.isscalar(matrixshape): matrixshape = [matrixshape] label, value = values['test-args'][1]['repetitions'] repetitions = parsematlab.parse(value) if isinstance(repetitions, str): errors.append(label + '\n ' + value.replace('\n', '\n ')) if len(errors) > 0: Error('\n\n'.join(errors)) return classifier = loaddata.load_weights(weightfile) if isinstance(classifier, str): Error(classifier) return removeanomalies = values['generation-args'][1]['removeanomalies'][1] data = [] type = [] samplingrate = None try: for fname in fnames: result = loaddata.load_data(fname, [0, classifier.shape[0]], None, True, removeanomalies = removeanomalies) if isinstance(result, str): Error(result) return if samplingrate == None: samplingrate = result[2] if samplingrate != result[2]: Error('Not all data files have the same sampling rate.') return data.append(result[0]) type.append(result[1]) if len(data) == 0 or len(type) == 0: Error('You must select some data upon which to test the weights.') return try: data = np.concatenate(data) except ValueError: Error('Not all data files have the same number of channels.') return type = np.concatenate(type) result = testweights.test_weights(data, type, classifier, matrixshape, repetitions) if isinstance(result, str): Error(result) return score, correctness = result message = '\n'.join(fnames) message += '\n\n%s\n\nExpected accuracy for a %s matrix:\n\n' % \ ( weightfile, 'x'.join(str(i) for i in matrixshape) ) for i in range(len(repetitions)): if repetitions[i] != 1: message += '%i repetitions: %0.1f%%\n' % \ (repetitions[i], correctness[i] * 100) else: message += '1 repetition: %0.1f%%\n' % (correctness[i] * 100) message += '\nTarget STDEV: %f\nNontarget STDEV: %f\n' % score Info(message) except MemoryError: Error('Could not fit all the selected data in memory.\n' + \ 'Try loading fewer data files.') return
def trainandsave(): # 加载数据 root_path = "./data/cifar10" data_folder = "train" batch_size = 32 data_type = "train" trainloader = load_data(root_path, data_folder, batch_size, data_type) # 加载自定义模型 默认的分类数量就是10 net = vgg19_bn() if torch.cuda.is_available(): device = torch.device('cuda') net = net.cuda() else: device = torch.device('cpu') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print("device = ", device) net.to(device=device) # 优化器 optimizer = optim.SGD(net.parameters(), lr=1e-3, momentum=0.9) # 损失函数 # 损失函数也可以自己定义,我们这里用的交叉熵损失函数 celoss = nn.CrossEntropyLoss() # 训练部分 print("trainloader = ", trainloader) for epoch in range(300): # 训练的数据量为5个epoch,每个epoch为一个循环 running_loss = 0.0 # 定义一个变量方便我们对loss进行输出 # 这里我们遇到了第一步中出现的trailoader,代码传入数据 for i, data in enumerate(trainloader, 0): # enumerate是python的内置函数,既获得索引也获得数据 # get the inputs # data是从enumerate返回的data,包含数据和标签信息,分别赋值给inputs和labels inputs, labels = data #print("inputs = ", inputs) #print("labels = ", labels) # wrap them in Variable # 转换数据格式用Variable inputs, labels = Variable(inputs), Variable(labels) # 梯度置零,因为反向传播过程中梯度会累加上一次循环的梯度 optimizer.zero_grad() # forward + backward + optimize inputs = inputs.to(device) labels = labels.to(device) # 把数据输进CNN网络net outputs = net(inputs) loss = celoss(outputs, labels) # 计算损失值 loss.backward() # loss反向传播 计算反向梯度 optimizer.step() # 利用反向梯度 参数更新 #running_loss += loss.data[0] # loss累加 running_loss += loss.item() # loss累加 # 每个epoch要训练所有的图片,每训练完成200张便打印一下训练的效果(loss值) if (i + 1) % 200 == 0: localtime = time.asctime(time.localtime(time.time())) print(localtime, '[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 200)) # 然后再除以200,就得到这两百次的平均损失值 running_loss = 0.0 # 这一个200次结束后,就把running_loss归零,下一个200次继续使用 # 每五十个epoch保存一次参数 if (epoch + 1) % 50 == 0: save_name = "default_net_params_ep" + str(epoch) + ".pkl" torch.save(net.state_dict(), save_name) print('Finished Training') # 保存神经网络 torch.save(net, 'pretrain_default_net.pkl') # 保存整个神经网络的结构和模型参数
if args.gpu: # check for cuda availability if torch.cuda.is_available: print('CUDA is available, use cuda mode') architecture = 'cuda' else: print('Cuda is not available on this system, fallback to cpu mode') architecture = 'cpu' else: print('Use cpu mode') architecture = 'cpu' print("...") print("Import training, test and validation set") print("...") dataloader_train, dataloader_test, dataloader_validation, class_to_idx_traing = loaddata.load_data('flowers') print("...") print("Building and traing model") model = network.build_and_train_model(args.model_init, args.hidden_layers, args.epochs, args.dropout, args.lr, architecture, dataloader_train, dataloader_validation, dataloader_test) print("...") print("The model looks like:") print(model) # run agains test data def test_model(model, testloader, architecture): model.eval() correct = 0 total = 0 with torch.no_grad(): for data in dataloader_test:
import keras import matplotlib.pyplot as plt import numpy as np from keras.models import load_model from loaddata import load_data, load_one import math # returns a compiled model # identical to the previous one model = load_model('9100.h5') ximages, yvalues = load_data() def getimage(index): xi = ximages[index] i = np.reshape(xi, (xi.shape[0], xi.shape[1])) return i def imshow(index): plt.figure() plt.imshow(getimage(index), cmap='gray') plt.show(block=False) def imrange(start=0, count=2): import math s = count + 5 plt.figure() fig0, plots = plt.subplots(count,
from keras.models import Sequential import matplotlib.pyplot as plt from matplotlib import style import time import warnings import numpy as np from numpy import newaxis import csv import pandas as pd import ast from numpy import diff from statistics import mean import loaddata #Loading Stock Data from saved CSV. X_train, y_train, X_test, y_test = loaddata.load_data('./stock/ebay.csv', 55, True) #stocks = AMZON , APPL , citigroup , dowjones , ebay , GOOG , KO , TATA , test class RecurrentNeuralNetwork: def __init__(self, xs, ys, rl, eo, lr): #initial input self.x = np.zeros(xs) #input size self.xs = xs #expected output self.y = np.zeros(ys) #output size self.ys = ys #weight matrix for interpreting results from LSTM cell self.w = np.random.random((ys, ys))
from loaddata import load_data import csv from matplotlib import pyplot import pandas as pd dataset = load_data('capture20110811.pcap.netflow.labeled') dataset_botnet = dataset[dataset['Label'] == 'Botnet'] ip_to_analyse = dataset_botnet.iloc[0]['src_ip'] all_data_ip_to_analyse = dataset[dataset['src_ip'] == ip_to_analyse] botnet_data_ip_to_analyze = dataset_botnet[dataset_botnet['src_ip'] == ip_to_analyse] features = ['Prot', 'Packets'] def get_markov_chain(feature, data): transition_counts = {} # print(data.size) # print(ngram_length) old_state = None for state in data[feature]: # print(state) # ngram = str(data[feature][i]) # for j in range(i, i+ngram_length): # ngram += '{}, '.format(data.at[j, data[feature]]) if old_state is not None:
def main(): best_score = 0 # Load all stations from the csv file skip = False stations_data = load_data("data/StationsNationaal.csv", skip) # Prompt user for necessary input user_choices = user_interface(stations_data) # Load the connections data from the csv file (Heel Nederland or Noord- en # Zuid Holland), skipping a station if necessary data_list = load_data(user_choices["data"], user_choices["skip"]) for _ in range(user_choices["attempts"]): # Load all stations as objects into a dictionary stations_objects = load_stations(data_list, stations_data) # Load all connections into a dictionary connection_objects = load_connections( data_list, stations_objects, user_choices["change_connections"]) # Generate a random solution with chosen heuristics solution = random_solution(stations_objects, connection_objects, user_choices) # Run the heuristic of "cutting" trains, if the user chose this option if user_choices["cut_connections"]: solution = cut(solution) # Run the heuristic of "pasting" trains, if the user chose this option if user_choices["paste_connections"]: solution = paste(solution, user_choices["max_minutes"]) # Delete empty trains from solution solution = delete_trains(solution) # Calculate the K of a solution with the given function score = calculate(solution) # Set score to new values if score > best_score: best_solution = solution best_score = score # Open outputfile f = open("output.csv", "w") f.write("random:\ntrein, lijnvoering\n") counter = 0 # Write random solution to outputfile for train in best_solution["trains"]: counter += 1 f.write(f'trein_{counter}, "{train}"\n') f.write(f"SCORE:{best_score}\n\n") f.close() # If simulated annealing is chosen if user_choices["sim_annealing"] == True: best_solution = simulated_annealing(solution, stations_objects, user_choices) # If heuristic cut is chosen if user_choices["SA_cut_connections"]: best_solution = cut(best_solution) best_solution = delete_trains(best_solution) better_score = calculate(best_solution) # Open outputfile f = open("output.csv", "a+") f.write("simulated annealing:\ntrein, lijnvoering\n") counter = 0 # Write simulated annealing solution to outputfile for train in best_solution["trains"]: counter += 1 f.write(f'trein_{counter}, "{train}"\n') f.write(f"SCORE:{better_score}\n\n") f.close() # Draw the map if user_choices["data"] == "data/ConnectiesHolland.csv": draw_train_holland(best_solution, stations_objects) else: draw_train(best_solution, stations_objects)
# -*- coding: utf-8 -*- # 1. Load data import loaddata data, names = loaddata.load_data("iquitos-train.csv") import numpy #1. Data normalizazion #http://scikit-learn.org/stable/modules/preprocessing.html from sklearn import preprocessing min_max_scaler = preprocessing.MinMaxScaler() rows = 206 cols = 15 print(cols) datanorm = min_max_scaler.fit_transform(data) #2. Principal Component Analysis from sklearn.decomposition import PCA estimator = PCA(n_components=2) X_pca = estimator.fit_transform(datanorm) import matplotlib.pyplot as plt plt.plot(X_pca[:, 0], X_pca[:, 1], 'x') #3. Hierarchical Clustering # 3.1. Compute the similarity matrix import sklearn.neighbors import numpy dist = sklearn.neighbors.DistanceMetric.get_metric('euclidean')
def trainandsave(): # 加载数据 root_path = "./data/cifar10" data_folder = "train" batch_size = 32 data_type = "train" trainloader = load_data(root_path, data_folder, batch_size, data_type) # 加载预训练模型 net = torchvision.models.vgg19_bn(pretrained=True) # 最后输出层从1000个分类,换层10个分类 net.classifier._modules['6'] = nn.Sequential(nn.Linear(4096, 10), nn.Softmax(dim=1)) # 冻结特征层,分类层进行训练 param_group = [] learning_rate = 1e-3 for name, parameters in net.named_parameters(): if not name.__contains__('classifier'): parameters.requires_grad = False param_group += [{'params': parameters, 'lr': learning_rate}] if torch.cuda.is_available(): device = torch.device('cuda') net = net.cuda() else: device = torch.device('cpu') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print("device = ", device) net.to(device=device) # 优化器 optimizer = optim.SGD(param_group, momentum=0.9) # 损失函数 # 损失函数也可以自己定义,我们这里用的交叉熵损失函数 celoss = nn.CrossEntropyLoss() # 训练部分 print("trainloader = ", trainloader) for epoch in range(300): # 训练的数据量为5个epoch,每个epoch为一个循环 running_loss = 0.0 # 定义一个变量方便我们对loss进行输出 # 这里我们遇到了第一步中出现的trailoader,代码传入数据 for i, data in enumerate(trainloader, 0): # enumerate是python的内置函数,既获得索引也获得数据 # get the inputs # data是从enumerate返回的data,包含数据和标签信息,分别赋值给inputs和labels inputs, labels = data #print("inputs = ", inputs) #print("labels = ", labels) # wrap them in Variable # 转换数据格式用Variable inputs, labels = Variable(inputs), Variable(labels) # 梯度置零,因为反向传播过程中梯度会累加上一次循环的梯度 optimizer.zero_grad() # forward + backward + optimize inputs = inputs.to(device) labels = labels.to(device) # 把数据输进CNN网络net outputs = net(inputs) loss = celoss(outputs, labels) # 计算损失值 loss.backward() # loss反向传播 计算反向梯度 optimizer.step() # 利用反向梯度 参数更新 #running_loss += loss.data[0] # loss累加 running_loss += loss.item() # loss累加 # 每个epoch要训练所有的图片,每训练完成200张便打印一下训练的效果(loss值) if (i + 1) % 200 == 0: localtime = time.asctime(time.localtime(time.time())) print(localtime, '[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 200)) # 然后再除以200,就得到这两百次的平均损失值 running_loss = 0.0 # 这一个200次结束后,就把running_loss归零,下一个200次继续使用 # 每五十个epoch保存一次参数 if (epoch + 1) % 50 == 0: save_name = "classifier_net_params_ep" + str(epoch) + ".pkl" torch.save(net.state_dict(), save_name) print('Finished Training') # 保存神经网络 torch.save(net, 'pretrain_classifier_net.pkl') # 保存整个神经网络的结构和模型参数
import sklearn.neighbors import pandas as pd # Loads the data into array 'cases' import loaddata #cases = loaddata.load_data() cases = pd.read_csv('Data/dengue_features_train.csv') cases = cases.drop(cases.index[[88, 140, 400, 452, 752, 712, 764, 495]]) #principal outliers cases = cases.drop(cases.index[[700, 502, 361, 253, 254, 330, 493]]) cases = cases.fillna(cases.mean()) print cases cases.to_csv('out.csv', index=False) cases = loaddata.load_data() #for i in list: # del cases[i] # Normalization of the data to work with it in clustering min_max_scaler = preprocessing.MinMaxScaler() norm_cases = min_max_scaler.fit_transform(cases) from sklearn.decomposition import PCA estimator = PCA(n_components=2) X_pca = estimator.fit_transform(norm_cases) plt.plot(X_pca[:, 0], X_pca[:, 1], 'x') # Computing the similarity matrix. Here the distance function that we selected is chosen. dist = sklearn.neighbors.DistanceMetric.get_metric('euclidean') matsim = dist.pairwise(norm_cases)
tfconfig = tf.ConfigProto() tfconfig.gpu_options.allow_growth=True import utils # Configuration settings squaredIB = True # Whether to minimize beta*I(X;T) - I(Y;T) or beta*I(X;T)^2 - I(Y;T) batchsize = 256 # Mini batch size report_every = 10 # How often to report patience = 10 # Early stopping patience -- # epochs to go without improvement on validation data beta = 0.05 # IB trade-off parameter # Load data import loaddata data = loaddata.load_data('MNIST', validation=True) input_dim = data['trn_X'].shape[1] output_dim = data['trn_Y'].shape[1] # Build the network tf.reset_default_graph() import iblayer iblayerobj = iblayer.NoisyIBLayer() layers = [] layers.append( tf.placeholder(tf.float32, [None,input_dim,], name='X' ) ) layers.append( tf.keras.layers.Dense(128, activation=tf.nn.relu)(layers[-1]) ) layers.append( tf.keras.layers.Dense(128, activation=tf.nn.relu)(layers[-1]) ) layers.append( tf.keras.layers.Dense(10 , activation=None)(layers[-1]) )
#!/usr/bin/env python2 # -*- coding: utf-8 -*- """ Created on Tue Oct 17 12:20:16 2017 @authors: Sergio Alises Mendiola and Raul Gallego de la Sacristana Alises """ # -*- coding: utf-8 -*- # 1. Load data import loaddata data = loaddata.load_data("../Data/dengue_features_train_outliers.csv") #1. Data normalizazion #http://scikit-learn.org/stable/modules/preprocessing.html from sklearn import preprocessing min_max_scaler = preprocessing.MinMaxScaler() datanorm = min_max_scaler.fit_transform(data) #2. Principal Component Analysis from sklearn.decomposition import PCA estimator = PCA(n_components=2) X_pca = estimator.fit_transform(datanorm) import matplotlib.pyplot as plt plt.plot(X_pca[:, 0], X_pca[:, 1], 'x') #3. Hierarchical Clustering # 3.1. Compute the similarity matrix
def main(): years = None features_excluded = ['week_start_date'] _outliers = None cities = get_values_of("../data/dengue_features_train.csv", 'city') target = ['total_cases'] all_revelant_features = {} for city in cities: # Filtering by values of the keys _filter = {'city': [city], 'year': years} #Load city data data = load_data("../data/dengue_features_train.csv", filter_parameters=_filter, excludes_features=features_excluded, outliers=_outliers) # Load total cases by city, year and week of year data_labels = load_data("../data/dengue_labels_train.csv", filter_parameters=_filter) # Adapt data for clustering data_test_hiech = data.drop(labels=['city', 'year'], axis=1, inplace=False) # Outliers will be deleted elements, outliers, cut = clustering.hierarchical_clustering( data=data_test_hiech) n_element = count_elements(elements) n_outliers = count_elements(outliers) total = n_element + n_outliers print 'Analysis in: %s' % (city) total_outliers = [] while (outliers != None): total_outliers += outliers data_test_hiech.drop(outliers, axis=0, inplace=True) elements, outliers, cut = clustering.hierarchical_clustering( data_test_hiech, cut=cut, first_total=total) if total_outliers: print 'Auto-detected Outliers:' print total_outliers # Join data data_without_outliers = data data_without_outliers.drop(total_outliers, axis=0, inplace=True) merge_data = pd.merge(data_without_outliers, data_labels, on=['city', 'year', 'weekofyear'], how='outer') merge_data.drop(labels=['city', 'year'], axis=1, inplace=True) merge_data.dropna(inplace=True) # Features clustering data_for_features = merge_data.drop(labels=target, axis=1) clustering.hierarchical_clustering_features(data_for_features) # Croos Validation for select features feature_selected, max_deph = cros.cross_validation( merge_data, algorithm='DecisionTreeRegressor') # Regressor for select relevant features relevant_features = reg.tree_regressor(merge_data, max_deph, feature_selected, target, city) all_revelant_features[city] = relevant_features # For each city, one model KNN # Croos Validation for select features n_neighbors, X, y = cros.cross_validation(merge_data, algorithm='KNN', features=relevant_features, target=target, verbose=True) #--------------------------------------------- # prediction data_Test = load_data("../data/dengue_features_test.csv", filter_parameters=_filter, excludes_features=features_excluded, outliers=_outliers) #data_Test.dropna(inplace = True) test = data_Test[relevant_features] test.interpolate(method='linear', inplace=True) knn = neighbors.KNeighborsRegressor(n_neighbors, weights='distance') prediction = knn.fit(X, y).predict(test) # show prediction print "\nPREDICTION:" xx = np.stack(i for i in range(len(prediction))) plt.plot(xx, prediction, c='g', label='prediction') plt.axis('tight') plt.legend() plt.title("KNeighborsRegressor (k = %i, weights = '%s')" % (n_neighbors, 'distance')) plt.show() # write the results in a csv file submission_data = load_data("../data/submission_format.csv", filter_parameters=_filter) final_data = [] for i in range(len(final_data)): row = [] row.append(submission_data.iloc[i]['city']) row.append(submission_data.iloc[i]['year']) row.append(submission_data.iloc[i]['weekofyear']) row.append(int(prediction[i])) final_data.append(row) col = ["city", "year", "weekofyear", "total_cases"] df = pd.DataFrame(final_data, columns=col) df.to_csv('../data/predictions_for_' + city + '.csv', index=False, sep=',', encoding='utf-8') #--------------------------------------------- print '\n\t [ SELECTED FEATURES ]' for key, value in all_revelant_features.iteritems(): print 'City: %s, %2d features: \n\t %s' % (key, len(value), str(value))
theta = 30 alpha1 = 25 alpha2 = 15 beta1 = 20 beta2 = 25 from loaddata import load_data check_node_lst, START, END = load_data("附件1:数据集1-终稿.xlsx")
def testmodel(): # 设置计算设备 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # 加载数据 root_path = "./data/cifar10_save" data_folder = "test" batch_size = 8 data_type = "test" testloader = load_data(root_path, data_folder, batch_size, data_type) # 加载模型 model_path = "net.pkl" net = torch.load(model_path) # 选择模式 测试模式 # 这是因为这两个方法是针对在网络训练和测试时 # 采用不同方式的情况,比如Batch Normalization 和 Dropout net.eval() # 加载标签名字 label_name_path = "./data/cifar-10-batches-py/batches.meta" labelnames = unpickle(label_name_path) labelnames = labelnames[b'label_names'] label_names = [] for l in labelnames: label_names.append(l.decode("utf-8")) # 识别出来的数量 pred_num = dict() # 标签总数量 test_num = dict() # 识别对的数量 correct_num = dict() for label in label_names: test_num[label] = 0 pred_num[label] = 0 correct_num[label] = 0 with torch.no_grad(): for n_iter, (image, label) in enumerate(testloader): print("iteration: {}\ttotal {} iterations".format( n_iter + 1, len(testloader))) image = image.to(device) label = label.to(device) output = net(image) _, pred = output.topk(5, 1, largest=True, sorted=True) label = label.view(label.size(0), -1).expand_as(pred) correct = pred.eq(label).float() labels = label.cpu().numpy() # test_num[l] += 1 for p in pred.cpu().numpy(): pred_num[label_names[p[0]]] += 1 for i, c in enumerate(correct.cpu().numpy()): l = labels[i][0] label_name = label_names[l] test_num[label_name] += 1 if int(c[0]) == 1: correct_num[label_name] += 1 print("=====") print(" label name recall precision") for i in range(10): label_name = label_names[i] print( label_name, " {:.1f}% {:.1f}%".format( float(correct_num[label_name]) / float(test_num[label_name]) * 100, float(correct_num[label_name]) / float(pred_num[label_name]) * 100))
Display the Dendrogram of the data. @author: Ruth Rodríguez-Manzaneque López, Diego Andérica Richard y Laura Jaime Villamayor """ import matplotlib.pyplot as plt import numpy from scipy import cluster from sklearn import preprocessing import sklearn.neighbors import loaddata # 0. Load Data. records, names = loaddata.load_data("../Data/dengue_features_train.csv") # 1. Normalization of the data. min_max_scaler = preprocessing.MinMaxScaler() records = min_max_scaler.fit_transform(records) # 2. Compute the similarity matrix. dist = sklearn.neighbors.DistanceMetric.get_metric('chebyshev') matsim = dist.pairwise(records) avSim = numpy.average(matsim) # It is used the Average Distance. print "%s\t%6.2f" % ('Average Distance', avSim) # 3. Building the Dendrogram with the method Complete and it is cut at level of 6. clusters = cluster.hierarchy.linkage(matsim, method='complete') cut_level = 6
cfg['beta_max'], cfg['beta_npoints'], endpoint=True)[::-1] run_methods = cfg['methods'].split(',') # only run specified method if not os.path.exists(savedir): print("Making directory", savedir) os.makedirs(savedir) import loaddata, iblayer, utils import tensorflow as tf tf.logging.set_verbosity(tf.logging.ERROR) tfconfig = tf.ConfigProto() tfconfig.gpu_options.allow_growth = True data = loaddata.load_data(cfg['runtype'], validation=True) input_dim = data['trn_X'].shape[1] output_dim = data['trn_Y'].shape[1] def train(sess, method, beta, cfg, data, net, savedir): # sess : TensorFlow session # method : 'ce' (cross-entropy only), 'nlIB' (nonlinear IB), or 'VIB' (variational IB) # beta : beta value # cfg : configuration dictionary # data : data object # net : neural network object # savedir : directory where to save results def calcstats(epoch, do_print=False): lobj = net.iblayerobj
def generateFeatureWeights(name, values): args = values['generation-args'][1] errors = [] for key in args: if key in ('removeanomalies', 'classificationmethod'): continue label, value = args[key] value = parsematlab.parse(value) if isinstance(value, str): errors.append(label + '\n ' + value.replace('\n', '\n ')) args[key] = value if len(errors) > 0: Error('\n\n'.join(errors)) return response_window = args['responsewindow'] decimation_frequency = args['decimationfrequency'] max_model_features = args['maxmodelfeatures'] penter = args['penter'] premove = args['premove'] random_sample_percent = args['randompercent'] channelset = args['channelset'] - 1 fnames = values['flist'][1] weightwidget = values['weightfile'][0] removeanomalies = args['removeanomalies'][1] classificationmethod = args['classificationmethod'][1] data = [] type = [] samplingrate = None channels = None try: for fname in fnames: result = loaddata.load_data(fname, response_window, None, removeanomalies = removeanomalies) if isinstance(result, str): Error(result) return if samplingrate == None: samplingrate = result[2] if samplingrate != result[2]: Error('Not all data files have the same sampling rate.') return if channels == None: channels = result[0].shape[2] if channels != result[0].shape[2]: Error('Not all data files have the same number of channels.') return try: data.append(result[0][:, :, channelset]) except IndexError: Error('"Channel Set" is not a subset of the available ' + \ 'channels.') return type.append(result[1]) if len(data) == 0 or len(type) == 0: Error('You must select some data from which to generate ' + \ 'the weights.') return data = np.concatenate(data) type = np.concatenate(type) randomindices = np.arange(data.shape[0], dtype = int) np.random.shuffle(randomindices) randomindices = randomindices[:data.shape[0] * random_sample_percent // 100] randomindices.sort() data = data[randomindices] type = type[randomindices] result = swlda.swlda(data, type, samplingrate, response_window, decimation_frequency, max_model_features, penter, premove) if isinstance(result, str): Error(result) return channels, weights = result channels = channelset[channels - 1] + 1 # Convert from one-based for # indexing, and then to one-based for human readability. prm = exportToPRM(channels, weights, response_window[1]) try: fname = SaveAs(filetypes = [('Parameter Files', '.prm')], defaultextension = 'prm') if fname: prmfile = open(fname, 'wb') prmfile.write(prm) prmfile.close() weightwidget.setContents(fname) except: Error('Could not write PRM file.') return except MemoryError: Error('Could not fit all the selected data in memory.\n' + \ 'Try loading fewer data files.') return
import matplotlib.pyplot as plt import numpy #http://docs.scipy.org/doc/scipy/reference/cluster.html from scipy import cluster from sklearn import preprocessing import sklearn.neighbors # 0. Load Data import loaddata states, names = loaddata.load_data("iquitos-train_with_name.csv") features = numpy.transpose(states) #1. Normalization of the data #http://scikit-learn.org/stable/modules/preprocessing.html from sklearn import preprocessing min_max_scaler = preprocessing.MinMaxScaler() features_norm = min_max_scaler.fit_transform(features) #1.2. Principal Component Analysis from sklearn.decomposition import PCA estimator = PCA(n_components=3) X_pca = estimator.fit_transform(features_norm) print("Variance Ratio: ", estimator.explained_variance_ratio_) import matplotlib.pyplot as plt fig, ax = plt.subplots() print(len(names)) for i in range(len(X_pca)): print(i) plt.text(X_pca[i][0], X_pca[i][1], names[i])
from sklearn.svm import SVC from loaddata import load_data x_tr, y_tr, x_tst = load_data() clf = SVC()
# -StartPython.py *- coding: utf-8 -*- from loaddata import load_data from loaddata import load_ip_sequence dataset = load_data( 'C:/Users/YI/Desktop/TUD/Cyber data analytics/LAB3/Sampling/capture20110811.pcap.netflow.labeled' ) ip_data = load_ip_sequence(dataset, '147.32.84.229') #%% def top10freq(lst): from collections import Counter d = {} for i in lst: if d.get(i): d[i] += 1 else: d[i] = 1 occurence = dict(Counter(d).most_common(10)) frequency = occurence for i in frequency: frequency[i] = occurence[i] / len(lst) return frequency # def reservoir_sampling(stream, k): import random i = 0 n = len(stream)
clim=[0.1, 1.1]) def sample_random_position(grid): obstacles_grid = grid[0, :, :, 0] x_coords, y_coords = np.argwhere(obstacles_grid == 0).T position = np.random.randint(x_coords.size) return (x_coords[position], y_coords[position]) if __name__ == '__main__': args = parser.parse_args() env = environments[args.imsize] print("Loading data...") x_test, _, _, _ = load_data(env['test_data_file']) print("Initializing VIN...") VIN = create_VIN( env['input_image_shape'], n_hidden_filters=150, n_state_filters=10, k=env['k'], ) print("Loading pre-trained VIN parameterss...") storage.load(VIN, env['pretrained_network_file']) plt.figure(figsize=(8, 8)) gridspec = gridspec.GridSpec(5, 4, height_ratios=[0, 2, 2, 2, 2]) gridspec.update(wspace=0.1, hspace=0.1)
theta = 20 alpha1 = 20 alpha2 = 10 beta1 = 15 beta2 = 20 from loaddata import load_data check_node_lst, START, END = load_data("附件2:数据集2-终稿.xlsx")
def main(): first = True name_file = assign_name() prediction_path = '../predictions/' + name_file if not os.path.exists(prediction_path): os.makedirs(prediction_path) years = None features_excluded = ['week_start_date'] _outliers = None cities = get_values_of("../data/dengue_features_train.csv", 'city') target = 'total_cases' all_revelant_features = {} all_scores = [] modes = [ #'dropna', 'interpolate', 'mean', ['interpolate', 'mean'] ] #, ['interpolate', 'dropna']] for mode in modes: first = True scores_city = {} for city in cities: # Filtering by values of the keys _filter = {'city': [city], 'year': years} #Load city data data = load_data("../data/dengue_features_train.csv", filter_parameters=_filter, excludes_features=features_excluded, outliers=_outliers) # Load total cases by city, year and week of year data_labels = load_data("../data/dengue_labels_train.csv", filter_parameters=_filter) data_fill = data_fill_mode(data, mode) data_labels_fill = data_fill_mode(data_labels, mode) # Adapt data for clustering data_test_hiech = data_fill.drop(labels=['city', 'year'], axis=1, inplace=False) # Outliers will be deleted elements, outliers, cut = clustering.hierarchical_clustering( data=data_test_hiech, verbose=False) n_element = count_elements(elements) n_outliers = count_elements(outliers) total = n_element + n_outliers print 'Analysis in: %s on mode %s' % (city, str(mode)) total_outliers = [] while (outliers != None): total_outliers += outliers data_test_hiech.drop(outliers, axis=0, inplace=True) elements, outliers, cut = clustering.hierarchical_clustering( data_test_hiech, cut=cut, first_total=total, verbose=False) if total_outliers: print 'Auto-detected Outliers:' print total_outliers # Join data data_without_outliers = data_fill data_without_outliers.drop(total_outliers, axis=0, inplace=True) merge_data = pd.merge(data_without_outliers, data_labels_fill, on=['city', 'year', 'weekofyear'], how='inner') first_year = merge_data['year'].min() last_year = merge_data['year'].max() split_year = int(last_year - round((last_year - first_year) * 0.2)) # Features clustering data_for_features = merge_data.drop(labels=['city', 'total_cases'], axis=1) feature_groups = clustering.hierarchical_clustering_features( data_for_features, verbose=False) # Croos Validation for select features features_selected, max_deph = cros.cross_validation(merge_data, feature_groups, split_year, target=target) # Regressor for select relevant features relevant_features = reg.tree_regressor(merge_data, split_year, max_deph, features_selected, target, city, verbose=False) all_revelant_features[city] = relevant_features all_features = merge_data.columns.tolist()[1:-1] data_Test = load_data("../data/dengue_features_test.csv", filter_parameters=_filter, excludes_features=features_excluded, outliers=_outliers) # prediction prediction_knn, score_knn = predict.knn_prediction( merge_data, split_year, features_selected, target, data_Test, verbose=True) print('Score KNN on %s mode is : %.4f' % (mode, score_knn)) prediction_rf, score_rf = predict.rf_prediction(merge_data, split_year, all_features, target, data_Test, verbose=True) print('Score RandomForest on %s mode is : %.4f' % (mode, score_rf)) scores_city[city] = [(mode, 'Knn', score_knn), (mode, 'RF', score_rf)] # Load submission data file. submission_data = load_data("../data/submission_format.csv", filter_parameters=_filter) # wr ite the results in a csv file # Write result files. col = ["city", "year", "weekofyear", "total_cases"] write_result(col, submission_data, prediction_knn, prediction_rf, prediction_path, (name_file + str(mode)), first) first = False all_scores.append(scores_city) print all_scores """
if args.gpu: # check for cuda availability if torch.cuda.is_available: print('CUDA is available, use cuda mode') architecture = 'cuda' else: print('Cuda is not available on this system, fallback to cpu mode') architecture = 'cpu' else: print('Use cpu mode') architecture = 'cpu' print("...") print("Import training, test and validation set") print("...") dataloader_train, dataloader_test, dataloader_validation, class_to_idx_traing = loaddata.load_data( 'flowers') print("...") print("Building and traing model") model = network.build_and_train_model(args.model_init, args.hidden_layers, args.epochs, args.dropout, args.lr, architecture, dataloader_train, dataloader_validation, dataloader_test) print("...") print("The model looks like:") print(model) # run agains test data def test_model(model, testloader, architecture): model.eval()
tmp,medoids_ = totalcost(blogwords,distance.distance,medoids_idx) #print tmp,'-------->',medoids_.keys() if tmp < current_cost : best_choice = list(medoids_idx) best_res = dict(medoids_) current_cost = tmp medoids_idx[idx] = swap_temp iter_count += 1 print current_cost,iter_count if best_choice == medoids_idx : break if current_cost <= pre_cost : pre_cost = current_cost medoids = best_res medoids_idx = best_choice return current_cost, best_choice, best_res def print_match(best_medoids, blognames) : for medoid in best_medoids : print blognames[medoid],'----->', for m in best_medoids[medoid] : print '(',m,blognames[m],')', print print '---------' * 20 if __name__ == '__main__' : blogwords, blognames = loaddata.load_data() best_cost,best_choice,best_medoids = kmedoids(blogwords,8) print_match(best_medoids,blognames)
if network.last_epoch in steps: print("Saving pre-trained VIN model...") storage.save(network, env['pretrained_network_file']) new_step = steps[network.last_epoch] session = tensorflow_session() network.variables.step.load(new_step, session) return on_epoch_end if __name__ == '__main__': args = parser.parse_args() env = environments[args.imsize] print("Loading train and test data...") x_train, s1_train, s2_train, y_train = load_data(env['train_data_file']) x_test, s1_test, s2_test, y_test = load_data(env['test_data_file']) print("Initializing VIN...") network = algorithms.RMSProp( create_VIN( env['input_image_shape'], n_hidden_filters=150, n_state_filters=10, k=env['k'], ), verbose=True, error=loss_function, epoch_end_signal=on_epoch_end_from_steps(env['steps']), **env['training_options']
final=open("%seval.csv" % id,'a+') final.write("%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s;%s\n" % (votecount,bucketnumber,bsize,workerquality,prop,pcoverage,averagequality,accuracy[0],accuracy[1],accuracy[2],finalvalue[0],finalvalue[1])) final.close() result.close() print "done" def loop(id,area): count=[1,3,5,7] wquality=[0.8] bsize=[50,100,200] proportion=[0.5,0.55,0.6,0.65,0.7,0.75,0.80] for k in bsize: print k for i in count: print i for j in wquality: print j for l in proportion: print l processdata(id,i,j,area,k,l) if __name__ == "__main__": id = 54 # votecount = int(sys.argv[2]) # workerquality=float(sys.argv[3]) area=loaddata.load_data(id) loop(id,area)