Ejemplo n.º 1
0
    def load_data(self):
        col = self.data_copy.columns.values[self.len_info:]
        t_set = np.array(self.training_set)
        v_set = np.array(self.val_set)
        training_frame = pd.DataFrame(t_set, index=None, columns=col)
        val_frame = pd.DataFrame(v_set, index=None, columns=col)

        train_dataset = datasets(training_frame)
        val_dataset = datasets(val_frame)
        self.train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                                        batch_size=self.bs,
                                                        shuffle=True)
        self.val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
                                                    batch_size=self.bs,
                                                    shuffle=True)
        return self.train_loader
Ejemplo n.º 2
0
def main():
    args = parse_args()

    logger, final_output_dir, tb_log_dir = create_logger(
        config, args.cfg, 'valid')

    logger.info(pprint.pformat(args))
    logger.info(pprint.pformat(config))

    # cudnn related setting
    cudnn.benchmark = config.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = config.CUDNN.ENABLED

    model = eval('models.' + config.MODEL.NAME + '.get_cls_net')(config)

    dump_input = torch.rand(
        (1, 3, config.MODEL.IMAGE_SIZE[1], config.MODEL.IMAGE_SIZE[0]))
    logger.info(get_model_summary(model, dump_input))

    if config.TEST.MODEL_FILE:
        logger.info('=> loading model from {}'.format(config.TEST.MODEL_FILE))
        # model.load_state_dict(torch.load(config.TEST.MODEL_FILE))
        model.load_state_dict({
            k.replace('module.', ''): v
            for k, v in torch.load(config.TEST.MODEL_FILE)
            ['state_dict'].items()
        })

    else:
        model_state_file = os.path.join(final_output_dir,
                                        'final_state.pth.tar')
        logger.info('=> loading model from {}'.format(model_state_file))
        model.load_state_dict(torch.load(model_state_file))

    gpus = [0, 1]
    model = torch.nn.DataParallel(model, device_ids=gpus).cuda()

    # define loss function (criterion) and optimizer
    criterion = torch.nn.CrossEntropyLoss().cuda()

    # Data loading code
    valdir = os.path.join(config.DATASET.ROOT, config.DATASET.TEST_SET)
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    test_dataset = datasets(dataset_root='./Data/',
                            split='test',
                            size=config.MODEL.IMAGE_SIZE[0])

    valid_loader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=config.TRAIN.BATCH_SIZE_PER_GPU * len(gpus),
        shuffle=True,
        num_workers=config.WORKERS,
        pin_memory=True)

    # evaluate on validation set
    validate(config, valid_loader, model, criterion, final_output_dir,
             tb_log_dir, None)
Ejemplo n.º 3
0
 def __init__(self, bkps=[], mean=[], var=[], para_known=False):
     #用来逐一加入数据
     self.ts = []
     #用来逐一加入数据,每次检测到切换点之后更新
     self.ts_temp = []
     #存放累计和
     self.s_kt = []
     #存放反向累计和
     self.s_kt_reverse = []
     #生成PRI雷达数据
     self.data = datasets()
     #输出切换点
     self.bkp_detect = []
     #存放真实切换点
     self.bkps = bkps
     #存放真实均值
     self.mean = mean
     #存放真实方差
     self.var = var
     #在中间列表中用来计数
     self.index = 0
     #存放真实的均值
     self.average = []
     self.para_known = para_known
     j = 0
     for i in range(len(mean)):
         while j < bkps[i]:
             self.average.append(mean[i])
             j += 1
Ejemplo n.º 4
0
    def testrandomforest(self,dataset_name):
        
        datasetObj=datasets()
        dataset_files=datasetObj.get_datasetsFirstFilename(dataset_name)
        dataset_filestest=datasetObj.get_datasetsFirstFilenametest(dataset_name)
        #to do remove nsl-forest & predictor
        #bashCommand="$HADOOP_PREFIX/bin/hadoop fs -rmr /user/ubuntu/nsl-forest"
        bashCommand='curl -i -X DELETE "http://54.186.225.72:50070/webhdfs/v1/user/ubuntu/nsl-forest?op=DELETE&user.name=ubuntu&recursive=true"'
        process = subprocess.Popen(bashCommand, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
        output, error = process.communicate()
        print ("output is testrandomforest" +output)
        print (error)

        #bashCommand="$HADOOP_PREFIX/bin/hadoop fs -rmr /user/ubuntu/predictions"
        bashCommand='curl -i -X DELETE "http://54.186.225.72:50070/webhdfs/v1/user/ubuntu/predictions?op=DELETE&user.name=ubuntu&recursive=true"'
        process = subprocess.Popen(bashCommand, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
        output, error = process.communicate()
        print ("output is testrandomforest" +output)
        print (error)

        
        dataset_files=datasetObj.get_datasetsFirstFilename(dataset_name)
        bashCommand="$HADOOP_PREFIX/bin/hadoop jar $MAHOUT_HOME/examples/target/mahout-examples-1.0-SNAPSHOT-job.jar org.apache.mahout.classifier.df.mapreduce.BuildForest -Dmapred.max.split.size=1874231 -d /data/"+dataset_name+"/data/test/"+dataset_filestest[0]+" -ds /data/"+dataset_name+"/data/train/"+dataset_files[0]+".info -sl 5 -p -t 100 -o nsl-forest"
        process = subprocess.Popen(bashCommand, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
        output, error = process.communicate()
        print ("output is testrandomforest" +output)
        print (error)
        return "randomforestTestDataset done"
Ejemplo n.º 5
0
 def runrandomforest(self,dataset_name):
     
     datasetObj=datasets()
     dataset_files=datasetObj.get_datasetsFirstFilename(dataset_name)
     dataset_filestest=datasetObj.get_datasetsFirstFilenametest(dataset_name)
     bashCommand="$HADOOP_PREFIX/bin/hadoop jar $MAHOUT_HOME/examples/target/mahout-examples-1.0-SNAPSHOT-job.jar org.apache.mahout.classifier.df.mapreduce.TestForest -i /data/"+dataset_name+"/data/test/"+dataset_filestest[0]+" -ds /data/"+dataset_name+"/data/train/"+dataset_files[0]+".info -m nsl-forest -a -mr -o predictions"
     process = subprocess.Popen(bashCommand, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
     output, error = process.communicate()
     print ("output is runrandomforest" +output + "got it")
     print ("error is : "+error+" : got the error :)")
     result=error.split("Summary")[1]
     print "result is "+result
     new_result=result.split("\n")
     final_result="<br/>".join(new_result)
     return final_result
Ejemplo n.º 6
0
    def __init__(self, net, dataset_path, save_path, isCuda=True):
        self.net = net
        self.dataset = DataLoader(datasets(dataset_path), batch_size=2500, shuffle=True, drop_last=True, num_workers=2)
        self.save_path = save_path
        self.isCuda = isCuda

        if isCuda:
            self.net.cuda()

        self.confidence_loss_function = nn.BCELoss()
        self.offset_loss_function = nn.MSELoss()
        self.landmarks_loss_function = nn.MSELoss()
        self.optimizer = torch.optim.Adam(self.net.parameters())

        if os.path.exists(save_path):
            self.net.load_state_dict(torch.load(save_path))
        else:
            self.net.apply(weight_init)
Ejemplo n.º 7
0
import os

from numpy import random as rd 
import numpy as np

from dataset import datasets as datasets
import display

from ChangeFinder import CUsum    
from ChangeFinder import FSS



if __name__ == '__main__':
    data = datasets()
    signal, bkps, mean, var = data.PRI_Gauss_Jitter()

    # display.display_signal_score(signal)
    # detector = FSS(signal, bkps=[], mean=[], var=[], para_known=False, fixed_threshold=1000, fixed_size=50)
    # indicater = detector.fss_detection()
    detector = CUsum(bkps=[], mean=[], var=[], para_known=False)
    score = []
    for sig in signal:
        scor = detector.update(sig)
        score.append(scor)

    display.display_signal_score(signal, score, mode='PRI')



Ejemplo n.º 8
0
import numpy as np

import ChangeFinder as CF
from ChangeFinder import CUsum
from ChangeFinder import FSS
import display
import dataset
import warnings
import time
warnings.filterwarnings('ignore')
##################################################################################################
####################################### 生成数据集 #################################################
##################################################################################################
dataset = dataset.datasets(1000, 10)

# 生成均值跳跃数据集
# signal, bkps, outliers = dataset.jumping_mean_random(n_samples=1000, n_bkps=10)
# 生成均值方差跳跃数据集
# signal, bkps = dataset.jumping_mean_variance(n_samples=1000, n_bkp=10)
# 生成高斯抖动PRI脉冲
# signal, bkps, mean, var = dataset.PRI_Gauss_Jitter()
# 生成均匀抖动PRI脉冲
signal, bkps, lower_bound, upper_bound = dataset.PRI_norm_Jitter()
# 生成锐利抖动PRI脉冲
# signal, bkps, scale = dataset.PRI_rayleigh_Jitter()

# 高斯抖动PRI脉冲中添加缺失脉冲
# PRI_miss = dataset.miss_PRI(signal, miss_ratio=0)
# 高斯抖动PRI脉冲中添加虚假脉冲
# PRI_spur = dataset.add_spur_PRI(signal, para=0.0002, mode='pulse_ratio')    #添加虚假脉冲
Ejemplo n.º 9
0
import torch.nn as nn
from torch.utils.data import DataLoader
import os
import numpy as np
from net import trainNet
from dataset import datasets
import torch
'''该类用于训练模型'''

data = datasets(r"./datasets/train_img")
if os.path.exists("models/net.pth"):
    net = torch.load("models/net.pth")
else:
    net = trainNet().cuda()

train_data = DataLoader(data, batch_size=10, shuffle=True, drop_last=True)

loss_mse = nn.MSELoss()
loss_bce = nn.BCELoss()
optim = torch.optim.Adam(net.parameters())

for epoch in range(10):
    for i, (x, y) in enumerate(train_data):
        x = x.cuda()
        y = y.cuda()
        # 切片获取坐标和置信度
        coordinate = y[:, 0:4] / 224
        confidence = y[:, 4:5]
        coordinate_output, confidence_output = net(x)
        coordinate_loss = loss_mse(coordinate_output, coordinate)
        confidence_loss = loss_bce(confidence_output, confidence)
Ejemplo n.º 10
0
def main():
    args = parse_args()

    logger, final_output_dir, tb_log_dir = create_logger(
        config, args.cfg, 'train')

    logger.info(pprint.pformat(args))
    logger.info(pprint.pformat(config))

    # cudnn related setting
    cudnn.benchmark = config.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = config.CUDNN.ENABLED

    model = eval('models.'+config.MODEL.NAME+'.get_cls_net')(
        config)

    dump_input = torch.rand(
        (1, 3, config.MODEL.IMAGE_SIZE[1], config.MODEL.IMAGE_SIZE[0])
    )
    logger.info(get_model_summary(model, dump_input))

    # copy model file
    this_dir = os.path.dirname(__file__)
    models_dst_dir = os.path.join(final_output_dir, 'models')
    if os.path.exists(models_dst_dir):
        shutil.rmtree(models_dst_dir)
    shutil.copytree(os.path.join(this_dir, '../lib/models'), models_dst_dir)

    writer_dict = {
        'writer': SummaryWriter(log_dir=tb_log_dir),
        'train_global_steps': 0,
        'valid_global_steps': 0,
    }

    gpus = [0,1]
    model = torch.nn.DataParallel(model, device_ids=gpus).cuda()

    # define loss function (criterion) and optimizer
    criterion = torch.nn.CrossEntropyLoss().cuda()

    optimizer = get_optimizer(config, model)

    best_perf = 0.0
    best_model = False
    last_epoch = config.TRAIN.BEGIN_EPOCH
    if config.TRAIN.RESUME:
        model_state_file = os.path.join(final_output_dir,
                                        'checkpoint.pth.tar')
        if os.path.isfile(model_state_file):
            checkpoint = torch.load(model_state_file)
            last_epoch = checkpoint['epoch']
            best_perf = checkpoint['perf']
            model.module.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            logger.info("=> loaded checkpoint (epoch {})"
                        .format(checkpoint['epoch']))
            best_model = True
            
    if isinstance(config.TRAIN.LR_STEP, list):
        lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
            optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR,
            last_epoch-1
        )
    else:
        lr_scheduler = torch.optim.lr_scheduler.StepLR(
            optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR,
            last_epoch-1
        )

    # Data loading code
    traindir = os.path.join(config.DATASET.ROOT, config.DATASET.TRAIN_SET)
    valdir = os.path.join(config.DATASET.ROOT, config.DATASET.TEST_SET)



    train_dataset = datasets(dataset_root='./Data/',split='train',size= config.MODEL.IMAGE_SIZE[0])
    test_dataset = datasets(dataset_root='./Data/',split='test',size= config.MODEL.IMAGE_SIZE[0])

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=config.TRAIN.BATCH_SIZE_PER_GPU*len(gpus),
        shuffle=True,
        num_workers=config.WORKERS,
        pin_memory=True
    )

    valid_loader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=config.TRAIN.BATCH_SIZE_PER_GPU*len(gpus),
        shuffle=True,
        num_workers=config.WORKERS,
        pin_memory=True
    )

    for epoch in range(last_epoch, config.TRAIN.END_EPOCH):
        lr_scheduler.step()
        # train for one epoch
        train(config, train_loader, model, criterion, optimizer, epoch,
              final_output_dir, tb_log_dir, writer_dict)
        # evaluate on validation set
        perf_indicator = validate(config, valid_loader, model, criterion,
                                  final_output_dir, tb_log_dir, writer_dict)

        if perf_indicator > best_perf:
            best_perf = perf_indicator
            best_model = True
            torch.save(best_model, './best-model.pth')
        else:
            best_model = False

        logger.info('=> saving checkpoint to {}'.format(final_output_dir))
        save_checkpoint({
            'epoch': epoch + 1,
            'model': config.MODEL.NAME,
            'state_dict': model.module.state_dict(),
            'perf': perf_indicator,
            'optimizer': optimizer.state_dict(),
        }, best_model, final_output_dir, filename='checkpoint.pth.tar')

    final_model_state_file = os.path.join(final_output_dir,
                                          'final_state.pth.tar')
    logger.info('saving final model state to {}'.format(
        final_model_state_file))
    torch.save(model.module.state_dict(), final_model_state_file)
    writer_dict['writer'].close()
Ejemplo n.º 11
0
'''
Created on 2018年3月21日

@author: qimy
该部分利用softmax训练后的结果进行测试。
'''

import tensorflow as tf
from dataset import datasets

test_text_dir = "D:/Users/qimy/softWare/eclipse/workspace/voiceasr-tensorflow/test_set"
model_dir = "D:/Users/qimy/softWare/eclipse/workspace/voiceasr-tensorflow"

data_sets = datasets()
data_sets.read_test_data(test_text_dir, True)

sess = tf.InteractiveSession()

x = tf.placeholder(tf.float32,
                   [None, 5000])  #None占位符标示输入样本的数量,5000为单个样本的输入维度,对应字典维度
w = tf.Variable(tf.zeros([5000, 8]))  # 权重矩阵,行为输入维度,列为输出维度,这里为类别的数目8
b = tf.Variable(tf.zeros([8]))  # 偏重为8对应输出的维度
y = tf.nn.softmax(
    tf.matmul(x, w) +
    b)  # 定义训练输出结果,使用softmax作为激励函数,tf.matmul(x, W) + b为输入参数,tf.matmul为矩阵乘
y_ = tf.placeholder(tf.float32, [None, 8])

saver = tf.train.Saver()
saver.restore(sess, model_dir + "/model2/model.md")

correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
Ejemplo n.º 12
0
import subprocess
import requests
import json
from dataset import datasets
from fileFormatting import fileformatting
fileformattingObj=fileformatting()
datasetObj=datasets()

class algorithms:
    
    
    
    def trainlogistic(self,dataset_name,predictors,target):
        str_predictor=""
        count=0
        for i in predictors:
            count+=1
            str_predictor+=i+" "
        #datasetObj=datasets()
        dataset_files=datasetObj.get_datasetsFirstFilename(dataset_name)
        bashCommand="$MAHOUT_HOME/bin/mahout trainlogistic  --input ~/code/bdmaas/data/"+dataset_name+"/data/train/"+dataset_files[0]+"  --output ~/code/bdmaas/data/"+dataset_name+"/model  --target "+target+"   --categories 2  --predictors "+str_predictor+"   --types word  --features 20   --passes 100 --rate 50"
        process = subprocess.Popen(bashCommand, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
        output, error = process.communicate()
        print ("output is "+output)
        print (error)
        return "trainlogistic done"
    
    def runlogistic(self,dataset_name):
        #datasetObj=datasets()
        dataset_files=datasetObj.get_datasetsFirstFilenametest(dataset_name)
        bashCommand="$MAHOUT_HOME/bin/mahout runlogistic  --input ~/code/bdmaas/data/"+dataset_name+"/data/test/"+dataset_files[0]+"  --model ~/code/bdmaas/data/"+dataset_name+"/model --auc --confusion"
if config.verbose == True:
    logging.basicConfig(
        level=logging.DEBUG,  # 定义输出到文件的log级别,大于此级别的都被输出
        format=
        '%(asctime)s %(filename)s : %(levelname)s %(message)s',  # 定义输出log的格式
        datefmt='%Y-%m-%d %A %H:%M:%S',  # 时间
        filename=os.path.join(os.getcwd(), config.log_name),  # log文件名
        filemode='w')  # 写入模式“w”或“a”
batch_size = config.batch_size
num_epochs = config.num_epochs
learn_rate = config.learn_rate
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
if config.display_plot == True:
    vis = Visualizer(config.visualize_win)

train_datasets = datasets(is_train=True)
train_loader = Data.DataLoader(train_datasets,
                               batch_size=batch_size,
                               shuffle=True)

val_datasets = datasets(is_train=False)
val_loader = Data.DataLoader(val_datasets,
                             batch_size=batch_size,
                             shuffle=False)

model1 = SENet(SEResNetBottleneck, [3, 4, 6, 3],
               groups=1,
               reduction=16,
               dropout_p=0.5,
               inplanes=64,
               input_3x3=False,