def submit(data, sub=None, label=None, split=-1, filelist=True, script='analyze'): if not options['doData'] and data in datalist: print 'Warning submitting %s. Data is disabled' % data return if not options['doMC'] and data in mclist: print 'Warning submitting %s. MC is disabled' % data return SubmitCondor.NFILE_PER_BATCH = options['batchsize'] SubmitCondor.DoSubmit = options['submit'] dataset = getDataset(data) if dataset == None: print '%s not found in dataset' % data return if label is None: label = labelmap[data] subset = dataset[data] if sub is None: sublist = subset.keys() elif type(sub) != list: sublist = [sub] else: sublist = sub for sub in sublist: if sub not in subset: print '%s not found in subset' % sub continue for i, input in enumerate(subset[sub]): clabel = '%s%s_%i' % (label, sub, i) nlabel = '%s%s_%i' % (labelmap[data], sub, i) command = [ script, input, 'post%s.root' % clabel, '-1', '10000', nlabel, 'split_%i' % split ] if filelist: command = ['-f'] + command if options['region'] is not None: command = ['-r', options['region']] + command if options['year'] is not None: command = ['-y', options['year']] + command if options['parallel']: proc = Process(target=SubmitCondor.submit, args=(command, True)) proc.start() else: SubmitCondor.submit(command)
def main(): """Evolve a network.""" generations = 1000 # Number of times to evole the population. population = 25 # Number of networks in each generation. nn_param_choices = { 'epochs': 3000, 'nb_neurons': 4000, 'nb_layers': 2, 'activation': ['relu', 'elu', 'tanh', 'sigmoid'], 'optimizer': ['rmsprop', 'adam', 'sgd', 'adagrad', 'adadelta', 'adamax', 'nadam'], } logging.info("***Evolving %d generations with population %d***" % (generations, population)) data = getDataset() generate(generations, population, nn_param_choices, data)
def submit(data, sub=None, label=None, split=-1, filelist=True, script='analyze'): getargs() if not options['data'] and data in datalist: warning('Data is disabled', data) return if not options['mc'] and data in mclist: warning('MC is disabled', data) return if not options['signal'] and data in signalist: warning('Signal is disabled', data) return if options['onlyWZG'] and not any( wzg in data for wzg in ('wjets', 'zjets', 'dyjets', 'gjets')): warning('Only submitting WJets, ZJets, DYJets, or GJets enabled', data) return if any(options[sample] for sample in full_list) and not options[data]: warning('Only submitting %s' % (', '.join([sample for sample in full_list if options[sample]]))) return SubmitCondor.NFILE_PER_BATCH = options['batchsize'] SubmitCondor.DoSubmit = options['submit'] SubmitCondor.ResubmitError = options['error'] dataset = getDataset(data) if dataset == None: print '%s not found in dataset' % data return if label is None: label = labelmap[data] subset = dataset[data] if sub is None: sublist = subset.keys() elif type(sub) != list: sublist = [sub] else: sublist = sub for sub in sublist: if sub not in subset: print '%s not found in subset' % sub continue for i, input in enumerate(subset[sub]): clabel = '%s%s_%i' % (label, sub, i) nlabel = '%s%s_%i' % (labelmap[data], sub, i) command = [ script, input, 'post%s.root' % clabel, '-1', '10000', nlabel, 'split_%i' % split ] if filelist: command = ['-f'] + command if any(options['region']): command = ['-r', options['region']] + command if any(options['year']): command = ['-y', options['year']] + command if options['parallel']: proc = Process(target=SubmitCondor.submit, args=(command, True)) proc.start() else: SubmitCondor.submit(command)
# a=pd.concat([pd.Series(data=actual, name='Actual'), pd.Series(data=predictions, name='Predicted'), pd.Series(data=probability, name='Probability')], axis=1) # a.to_csv(path_or_buf="predictions_all_test.csv", index=False) # all x_data, labels = importCSV2() actual = np.ndarray.argmax(labels, axis=1) p = model.predict(x_data) predictions = np.ndarray.argmax(p, axis=1) probability = np.ndarray.max(p, axis=1) a = pd.concat([ pd.Series(data=actual, name='Actual'), pd.Series(data=predictions, name='Predicted'), pd.Series(data=probability, name='Probability') ], axis=1) a.to_csv(path_or_buf="predictions_all.csv", index=False) # test _, x_test, _, y_test = getDataset() actual = np.ndarray.argmax(y_test, axis=1) p = model.predict(x_test) predictions = np.ndarray.argmax(p, axis=1) probability = np.ndarray.max(p, axis=1) a = pd.concat([ pd.Series(data=actual, name='Actual'), pd.Series(data=predictions, name='Predicted'), pd.Series(data=probability, name='Probability') ], axis=1) a.to_csv(path_or_buf="predictions_test.csv", index=False)
from keras.layers import Dense from pbar import PLogger from autosaver import AutoSaver from dataset import getDataset from keras.models import Sequential, load_model from keras.losses import categorical_crossentropy import pickle import os FILENAME = 'model/model.h5' EPOCHS_FILENAME = 'model/epochs.pkl' TOTAL_EPOCHS = 844 x_train, x_test, y_train, y_test = getDataset() class_weight = { 0: 1., 1: 71., 2: 74., 3: 73., 4: 73., 5: 66., 6: 61., 7: 57., 8: 57., 9: 63., 10: 66., 11: 66., 12: 66., 13: 66.,
from network import ConvNet import numpy as np from dataset import getDataset dataset = getDataset() print(dataset["balance_train"]) conv = ConvNet(dataset) conv.fit(dataset["one_hot_train"], dataset["one_hot_label_train"], dataset["one_hot_validation"], dataset["one_hot_label_validation"], graphics=True) print("Final accuracy:") print(" " + str( conv.computeAccuracy(dataset["one_hot_validation"], dataset["labels_validation"]))) print("Friends:") conv.classify(dataset["one_hot_friends"]) # # F = np.zeros((4, 2, 2)) # # print(F[:, :, 0]) # # print(F[:, :, 0].shape) # F[:, :, 0] = [[1, 2], [3, 4], [5, 6], [7, 8]] # F[:, :, 0] = [[1, 2], [3, 4], [5, 6], [7, 8]] # # print(F[:, :, 0]) # # print(F[:, :, 1]) # # print(F) #
def testParam(modelFunName, activateFunName, datasetName, modelWidth, dropoutType, groupNum, keepProb, batchSize, perAverageEpoch): ########################################### #load data logDir = getDataSaveDir(datasetName, modelWidth, activateFunName, dropoutType, groupNum, keepProb, perAverageEpoch, batchSize) all_train_data, all_train_labels, test_data, test_labels = dataset.getDataset( datasetName, "./datasets") all_train_data = all_train_data.astype(np.float32) test_data = test_data.astype(np.float32) temp_train_data, temp_train_labels = utils.shuffData( all_train_data, all_train_labels) #return copyed data validateDataLen = int(len(temp_train_data) * VALIDATE_RATE) validate_data = temp_train_data[:validateDataLen, :, :, :] validate_labels = temp_train_labels[:validateDataLen] part_train_data = temp_train_data[validateDataLen:, :, :, :] part_train_labels = temp_train_labels[validateDataLen:] utils.normalizeData(all_train_data) utils.normalizeData(part_train_data) utils.normalizeData(validate_data) utils.normalizeData(test_data) if isNeedAugment(datasetName): allTrainDataIterator = utils.AugmentDatasetLiterator( all_train_data, all_train_labels, EPOCHSPERCHECK, batchSize, isNeedFlip(datasetName)) partTrainDataIterator = utils.AugmentDatasetLiterator( part_train_data, part_train_labels, EPOCHSPERCHECK, batchSize, isNeedFlip(datasetName)) else: allTrainDataIterator = utils.DatasetLiterator(all_train_data, all_train_labels, EPOCHSPERCHECK, batchSize) partTrainDataIterator = utils.DatasetLiterator(part_train_data, part_train_labels, EPOCHSPERCHECK, batchSize) validateDataIterator = utils.DatasetLiterator(validate_data, validate_labels, 1, batchSize) testDataIterator = utils.DatasetLiterator(test_data, test_labels, 1, batchSize) ################################################ #build model imageShape = list(temp_train_data.shape) imageShape[0] = None imagePlaceholder = tf.placeholder(tf.float32, imageShape) labelPlaceholder = tf.placeholder(tf.int32, [None]) isTrainPlaceholder = tf.placeholder(tf.bool) learningRatePlaceholder = tf.placeholder(tf.float32, name="learningRate") validateErrPlaceholder = tf.placeholder(tf.float32) modelFun = getModelFun(modelFunName) logits, combineOps, averageOps = modelFun( imagePlaceholder, modelWidth, dataset.getDatasetClassNum(datasetName), getActivateFun(activateFunName), dropoutType, groupNum, keepProb, isTrainPlaceholder) loss = tf.losses.sparse_softmax_cross_entropy(labels=labelPlaceholder, logits=logits) predictions = tf.cast(tf.argmax(logits, axis=1), tf.int32) accuracy = tf.reduce_mean( tf.cast(tf.equal(predictions, labelPlaceholder), tf.float32)) optimizer = tf.train.AdamOptimizer(learning_rate=learningRatePlaceholder) trainOp = optimizer.minimize(loss) saver = tf.train.Saver(getSaveVariable()) tf.summary.scalar("1_accurate_", accuracy) tf.summary.scalar("2_loss_", loss) tf.summary.scalar("3_learningRate_", learningRatePlaceholder) tf.summary.scalar("4_validateErr_", validateErrPlaceholder) merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter(logDir, graph=tf.get_default_graph()) tensorMap = { "predictions": predictions, "isTrainPlaceholder": isTrainPlaceholder, "imagePlaceholder": imagePlaceholder, "labelPlaceholder": labelPlaceholder, "loss": loss, "accuracy": accuracy, "merged": merged, "trainOp": trainOp, "learningRatePlaceholder": learningRatePlaceholder, "train_writer": train_writer, "combineOps": combineOps, "averageOps": averageOps, "saver": saver, "validateErrPlaceholder": validateErrPlaceholder } #################################################### config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = (GPU_MEMORY_USE) / ( PARALLEL_RANK) session = tf.Session(config=config) session.run([tf.global_variables_initializer()]) modelPath = getModelSavePath(datasetName, modelWidth, activateFunName, dropoutType, groupNum, keepProb, perAverageEpoch, batchSize) errs = [] supConfigs = [] #globalAutoTuner = utils.GlobalLearningRateTuner(STARTLREANINGRATE) localAutoTuner = utils.LocalLearningRateTuner(STARTLREANINGRATE, maxDontSave=7) autoTuners = [localAutoTuner] #find train param base on validate data if USE_AUTO_TUNER: for autoTuner in autoTuners: session.run([tf.global_variables_initializer()]) trainModel(session, partTrainDataIterator, validateDataIterator, autoTuner, tensorMap, perAverageEpoch, modelPath, logDir) saver.restore(session, modelPath) # load early stop model errs.append(computErr(session, testDataIterator, tensorMap)) if IS_RETRAIN_ON_ALL_TRAINSET: supConfigs.append((autoTuner.getFixTuner(), allTrainDataIterator, testDataIterator)) else: fixTuner1 = utils.getFixLearningRateTuner([10, 10], [1e-3, 1e-4], isEarlyStop=False) supConfigs.append((fixTuner1, allTrainDataIterator, testDataIterator)) for tuner, trainIterator, validateIterator in supConfigs: session.run([tf.global_variables_initializer()]) trainModel(session, trainIterator, validateIterator, tuner, tensorMap, perAverageEpoch, modelPath, logDir) saver.restore(session, modelPath) #load model errs.append(computErr(session, testDataIterator, tensorMap)) session.close() if len(errs) == 0: raise Exception("config error") return [min(errs)]
from dataset import getDataset from helpFunction import imshow, show_plot from loss import ContrastiveLoss from net import SiameseNetwork # Config class Config: training_dir = "./data/training/" train_batch_size = 64 train_number_epochs = 50 # Load Train Dataset train_dataset = dset.MNIST(root=Config.training_dir, train=True) siamese_dataset = getDataset(train_dataset) # Show Dataset Example vis_dataloader = DataLoader(siamese_dataset, shuffle=True, num_workers=8, batch_size=8) dataiter = iter(vis_dataloader) example_batch = next(dataiter) concatenated = torch.cat((example_batch[0], example_batch[1]), 0) imshow(torchvision.utils.make_grid(concatenated)) print(example_batch[2].numpy()) train_dataloader = DataLoader(siamese_dataset, shuffle=True, num_workers=8, batch_size=Config.train_batch_size)
genres = [ filename for filename in genres if os.path.isdir(slicesPath + filename) ] nbClasses = len(genres) #print(str(genres)) #Create model model = createModel(nbClasses, sliceSize) if "train" in args.mode: #Create or load new dataset train_X, train_y, validation_X, validation_y, genre_dict = getDataset( filesPerGenre, genres, sliceSize, validationRatio, testRatio, genre_dict, mode="train") #Define run id for graphs run_id = "MusicGenres - " + str(batchSize) + " " + ''.join( random.SystemRandom().choice(string.ascii_uppercase) for _ in range(10)) #Train the model print("[+] Training the model...") model.fit(train_X, train_y, n_epoch=nbEpoch, batch_size=batchSize,
from sklearn.metrics import classification_report from dataset import getDataset import yaml parser = argparse.ArgumentParser() parser.add_argument('--config_file', type=str, required=True, help='Path to yaml file.') if __name__ == '__main__': args = parser.parse_args() model_dict = {'blockCNN': nw.blockCNN, 'extendedCNN': en.extendedCNN} with open(args.config_file, 'r') as f: config = yaml.load(f, yaml.SafeLoader) gd = getDataset(os.path.expanduser(config['data_path']), mode='Training') data_loader = DataLoader(gd, config['batch_size'], False) device = torch.device(config['device']) model = model_dict[config['model']](config['num_labels'], config['num_filters'], config['kernel_size']) model.to(device) loss = nn.CrossEntropyLoss() loss.to(device) opt = torch.optim.Adam(model.parameters(), lr=float(config['learning_rate'])) model.train() for ep in tqdm(range(config['epochs'])): for label, image in data_loader: opt.zero_grad() label = label.to(device).long() image = image.to(device)
tf.app.flags.DEFINE_float("keep_prob", 0.7, "drop out rate") tf.app.flags.DEFINE_boolean("is_train", arg.test, "False to inference") tf.app.flags.DEFINE_boolean( "is_scaled", False, "True if want to use scaled dataset (this option is for test only)") tf.app.flags.DEFINE_string("data_dir", "", "data dir") tf.app.flags.DEFINE_string("train_dir", "./train/" + arg.city + "/" + arg.name, "training dir") tf.app.flags.DEFINE_integer("inference_version", arg.inf, "the version for inferencing") FLAGS = tf.app.flags.FLAGS if (FLAGS.is_train): data = dataset.getDataset(FLAGS.city_name, FLAGS.time_step_name, batch_size=FLAGS.batch_size, is_train=FLAGS.is_train, date=FLAGS.current_test_date) else: data = pred_dataset.getPredDataset(FLAGS.city_name, FLAGS.time_step_name, batch_size=48, date=FLAGS.current_test_date, need_scale=FLAGS.is_scaled) dist_mat = data.get_dist_matrix() (aq_stations, meo_stations, dist_dims) = dist_mat.shape with tf.Session() as sess: if not os.path.exists(FLAGS.train_dir): os.mkdir(FLAGS.train_dir)
def testFullConnectedNNOnMnist(): trainData, trainLabel, testData, testLabel = dataset.getDataset( "MNIST", "./datasets") trainData = trainData.astype(np.float32) testData = testData.astype(np.float32) utils.normalizeData(trainData) utils.normalizeData(testData) trainDataIterator = utils.DatasetLiterator(trainData, trainLabel, 50, BATCH_SIZE) testDataIterator = utils.DatasetLiterator(testData, testLabel, 1, BATCH_SIZE) imagePlaceholder = Placeholder("image", [-1, 28 * 28]) labelPlaceholder = Placeholder("label", [-1, 10]) bathSizePlaceholder = Placeholder("batch size", [1]) learningRatePlaceholder = Placeholder("learning rate", [1]) output = imagePlaceholder output = denseLayer(output, 128, "layer1") output = ReluNode("relu1", [output]) output = denseLayer(output, 128, "layer2") output = ReluNode("relu2", [output]) logits = denseLayer(output, 10, "layer3") predictLabel = softmax(logits, "softmax") loss = meanCrossEntropy(predictLabel, labelPlaceholder, bathSizePlaceholder, "loss") gradientPairs = minimize(loss) trainOp = SGDNode("train op", gradientPairs, learningRatePlaceholder) step = 0 learningRate = np.array([1e-2]) bathSize = np.array([BATCH_SIZE]) for image, label in trainDataIterator.getNextBatch(): image = np.reshape(image, [-1, 28 * 28]) label = getOneHotLabel(label, 10) feedDic = { imagePlaceholder.getName(): image, labelPlaceholder.getName(): label, bathSizePlaceholder.getName(): bathSize, learningRatePlaceholder.getName(): learningRate } setFeedDic(feedDic) trainOp.getValue() step += 1 if step % 100 == 0: print("loss:" + str(loss.getValue()[0])) testCount = 0 errorCount = 0 for image, label in testDataIterator.getNextBatch(): image = np.reshape(image, [-1, 28 * 28]) feedDic = {imagePlaceholder.getName(): image} setFeedDic(feedDic) predict = predictLabel.getValue() predict = np.argmax(predict, -1) testCount += len(label) errorCount += sum((predict != label).astype(np.int32)) print("\n\n") print("error rate:" + str(errorCount / testCount))
def main(experiment, logging, augmentation, dataset, model, metric, training): if experiment["reproducible"]: print('fix seed on') seed = 0 random.seed(seed) # augmentation np.random.seed(seed) # numpy ia.seed(seed) # imgaug library torch.manual_seed(seed) # cpu torch.cuda.manual_seed(seed) # gpu torch.cuda.manual_seed_all(seed) # multi gpu torch.backends.cudnn.enabled = False # cudnn library torch.backends.cudnn.deterministic = True ################## # logging # ################## LOG = Logger(**logging) LOG('print', name='config', values=json.dumps(config)) ################## # dataset # ################## datasets = getDataset(**dataset, **augmentation) LOG('slack', name='dataset', values=[str(datasets['train']), str(datasets['val'])]) ################## # model # ################## MODEL = getModel(**model) input_size = [augmentation['channel']] + augmentation['size'] MODEL.modelSummary(input_size, LOG) ################## # metric # ################## metricParser = TypeParser(types={ "IOU": M.IOU, "DICE": M.DICE, "accuracy": M.Accuracy, "f1": M.F1, }) metrics = [metricParser(**m) for m in metric] ################## # training # ################## trainer = Trainer(model=MODEL, datasets=datasets, metrics=metrics, LOG=LOG) try: trainer.train(**config["training"]) except Exception as e: LOG('slack', name='warning', values='abrupt end, {}'.format(e)) LOG.finish() print('abrupt end, {}'.format(e)) print(traceback.format_exc()) infer = Inference( model=MODEL, datasets=datasets, LOG=LOG, metrics=metrics, visualizations=None, ) infer() LOG.finish()
cl_report = classification_report(y_true, y_pred, labels=list(range(7)), output_dict=True) pr = precision_score(y_true, y_pred, average='micro') print('Precision:', pr) return cl_report if __name__ == '__main__': args = parser.parse_args() model_dict = {'blockCNN': nw.blockCNN, 'extendedCNN': en.extendedCNN} with open(args.config_file, 'r') as f: config = yaml.load(f, yaml.SafeLoader) public = getDataset(os.path.expanduser(config['data_path']), mode='Public') private = getDataset(os.path.expanduser(config['data_path']), mode='Private') public_loader = DataLoader(public, config['batch_size'], False) private_loader = DataLoader(private, config['batch_size'], False) device = torch.device(config['device']) model = model_dict[config['model']](config['num_labels'], config['num_filters'], config['kernel_size']) model.to(device) #validation model.eval() public_res = evaluate(model, config['ckpt_path'], public_loader)
def testCNNOnCIAFR(): trainData, trainLabel, testData, testLabel = dataset.getDataset( "SVHN", "./datasets") trainData = trainData.astype(np.float32) testData = testData.astype(np.float32) utils.normalizeData(trainData) utils.normalizeData(testData) trainDataIterator = utils.DatasetLiterator(trainData, trainLabel, 10, BATCH_SIZE) testDataIterator = utils.DatasetLiterator(testData, testLabel, 1, BATCH_SIZE) imagePlaceholder = Placeholder("image", [-1, 32, 32, 3]) labelPlaceholder = Placeholder("label", [-1, 10]) bathSizePlaceholder = Placeholder("batch size", [1]) learningRatePlaceholder = Placeholder("learning rate", [1]) output = imagePlaceholder output = convLayer(output, 3, 2, 32, "layer1") output = ReluNode("relu1", [output]) output = convLayer(output, 3, 2, 64, "layer2") output = ReluNode("relu2", [output]) output = convLayer(output, 3, 2, 128, "layer3") output = ReluNode("relu3", [output]) print("last conv feature size:" + str(output.getShape())) avgPoolSize = max(output.getShape()[1], output.getShape()[2]) output = avgPool(output, avgPoolSize, 1, "globalPool") output = ReshapeOp("reshape", [output, np.array([-1, 128])]) logits = denseLayer(output, 10, "logits") predictLabel = softmax(logits, "softmax") loss = meanCrossEntropy(predictLabel, labelPlaceholder, bathSizePlaceholder, "loss") gradientPairs = minimize(loss) trainOp = MomentumSGDNode("train op", gradientPairs, learningRatePlaceholder, ConstValueNode("moment", np.array([0.9]))) step = 0 learningRate = np.array([1e-2]) bathSize = np.array([BATCH_SIZE]) nodes = [ node for node in getNodeByConstructSeq() if "const" not in node.getName() and "batch" not in node.getName() ] for image, label in trainDataIterator.getNextBatch(): label = getOneHotLabel(label, 10) feedDic = { imagePlaceholder.getName(): image, labelPlaceholder.getName(): label, bathSizePlaceholder.getName(): bathSize, learningRatePlaceholder.getName(): learningRate } setFeedDic(feedDic) trainOp.getValue() # for node in nodes: # name=node.getName() # value=node.getValue() # shape=value.shape # pass step += 1 print("step:" + str(step) + " loss:" + str(loss.getValue()[0])) testCount = 0 errorCount = 0 for image, label in testDataIterator.getNextBatch(): feedDic = {imagePlaceholder.getName(): image} setFeedDic(feedDic) predict = predictLabel.getValue() predict = np.argmax(predict, -1) testCount += len(label) errorCount += sum((predict != label).astype(np.int32)) print("\n\n") print("error rate:" + str(errorCount / testCount))
from helpFunction import plot_mnist from helpFunction import imshow from net import SiameseNetwork # Config class Config: testing_dir = "./data/testing/" # Load Model net = torch.load('./model') # Load Test Dataset dataset_test = dset.MNIST(root=Config.testing_dir, train=False) siamese_dataset = getDataset(dataset_test, relables=True) test_dataloader = DataLoader(siamese_dataset, num_workers=6, batch_size=1, shuffle=True) dataiter = iter(test_dataloader) numpy_all = [] numpy_labels = [] correct_pre = 0 # Testing for i in range(10000): x0, x1, label2, label0, label1 = next(dataiter) output1, output2 = net( Variable(x0).type(torch.FloatTensor).cuda(), Variable(x1).type(torch.FloatTensor).cuda())
def makeCrustIDT(args): rows, cols = getDatasetDims(args.region) lcds, elevds = getDataset(args.region) global crustIDT crustCoordsList = [] crustValuesList = [] crustlogger.info("making Crust IDT") # trying ten percent since one seemed too lame worldLatLong = getLatLongArray(lcds, (0, 0), (rows, cols), 1) crustCoordsList = [worldLatLong[randint(0,rows-1)*cols+randint(0,cols-1)] for elem in xrange(int(rows*cols*0.01))] crustValuesList = [uniform(1,5) for elem in crustCoordsList] crustIDT = Invdisttree(array(crustCoordsList), array(crustValuesList))
def checkScale(args): "Checks to see if the given scale is valid for the given region. Returns scale and multiplier." fullScale = 1 # don't want higher resolution than reality! if (isinstance(args.scale, list)): oldscale = args.scale[0] else: oldscale = int(args.scale) lcds, elevds = getDataset(args.region) elevds = None lcperpixel = lcds.transforms[2][1] lcds = None scale = min(oldscale, lcperpixel) scale = max(scale, fullScale) if (scale != oldscale): print "Warning: scale of %d for region %s is invalid -- changed to %d" % (oldscale, args.region, scale) mult = lcperpixel/scale args.scale = scale args.mult = mult return (scale, mult)
from pbar import PLogger from autosaver import AutoSaver from dataset import getDataset from keras.models import Sequential, load_model from keras.losses import categorical_crossentropy import pickle import os FILENAME='model/model.h5' EPOCHS_FILENAME='model/epochs.pkl' TOTAL_EPOCHS=844 x_train, x_test, y_train, y_test = getDataset() class_weight = { 0: 1., 1: 71., 2: 74., 3: 73., 4: 73., 5: 66., 6: 61., 7: 57., 8: 57., 9: 63., 10: 66., 11: 66., 12: 66., 13: 66.,
else: raise e if sys.argv[1][0:2] == 'UC': channelId = sys.argv[1] else: channelId = subprocess.check_output(["node", "../download/pYoutubeChannelIdFromUsername.js", sys.argv[1]]) channelId = channelId.strip().decode('utf-8') print('predicting last event for channelId: ' + channelId) subprocess.call(["node", "../download/pDownloadUserActivity.js", channelId]) X, y = dataset.getDataset([User('', channelId)]) if len(X) == 0: print('not enough history for user') sys.exit(0) print('actual last event: ' + dataset.types[y[0]]) print('predicted last events...') output = models[0].predict(X) print('logistic regression: ' + dataset.types[output[0]]) output = models[1].predict(X) print('support vector machine: ' + dataset.types[output[0]]) output = models[2].predict(X) print('nearest neighbor: ' + dataset.types[output[0]])
def train(X, y, C_reg=1e2, k_neighbors=3): """train three models and return them in a tuple (logistic regression, svm, nearest neighbors) """ mod_logreg = linear_model.LogisticRegression(C=1e2) mod_svm = svm.SVC(C=1e2) mod_neigh = neighbors.KNeighborsClassifier(n_neighbors=3) mod_logreg.fit(X, y) mod_svm.fit(X, y) mod_neigh.fit(X, y) return (mod_logreg, mod_svm, mod_neigh) if __name__ == '__main__': X, y = dataset.getDataset() examples = list(zip(X, y)) numpy.random.shuffle(examples) testExampleCount = math.floor(0.30 * len(examples)) testSet = examples[:testExampleCount] trainSet = examples[testExampleCount:] X, y = zip(*trainSet) models = train(X, y) X, y = zip(*testSet) accuracy = models[0].score(X, y) print('logistic regression accuracy: ', accuracy) accuracy = models[1].score(X, y) print('svm accuracy: ', accuracy) accuracy = models[2].score(X, y) print('nearest neighbor accuracy: ', accuracy)