Beispiel #1
0
def featureSelection(path):
     
     
     dataset = DataSet()
     class_name=''
     TrainSet=[]
     for root, dirs,files in os.walk(path):
         #print root
         if dirs==[]:
             if class_name != os.path.basename(root):
                class_name = os.path.basename(root)
                print class_name
                class_count = len(files)
                freq_map={}
                for f in files:
                  temp_set = set()
                  #print class_name , " <--> " ,f
                  with open (os.path.join(root,f),'r')  as fin:
                      lines = fin.readlines()
                      for line in lines:
                          for token in wordpunct_tokenize(line):
                              if token not in punctuation:
                                  temp_set.add(token.lower())
                               
                  for token in temp_set:
                      if freq_map.has_key(token):
                          freq_map[token] = freq_map[token] + 1
                      else:
                          freq_map[token]=1
                                  
                          
                          
                dataset.add_new_class(SNLPClass(class_name, freq_map,class_count))
                          
     return dataset
Beispiel #2
0
 def add_source(self):
     filename = tkFileDialog.askopenfilename()
     if filename:
         dataset = DataSet()
         dataset.readFromFile(filename)
         self.datasets.append(dataset)
         self.sourcelist.insert(END, str(dataset))
Beispiel #3
0
def main():
    if len(sys.argv) < 2:
        print 'Usage: python dataProcess.py [filenames]'
    
    fnames = sys.argv[1:]
    for fname in fnames:
        f = open(fname)
        dataArr = json.loads(f.read())
        lowerDataArr = sanitizeDataDict(dataArr)
            
        ds = DataSet(lowerDataArr)

        classHist = ds.getHistRepr("difficulty")
        print classHist
        classHistBinned = BinnedDataDict(classHist)
        print classHistBinned
Beispiel #4
0
def main(argv = None):
	if argv is None:
		argv = sys.argv

	if len(argv) < 3:
		print 'Please specify a training set and its metadata file.'
		return 1

	dt = DataSet(argv[1], argv[2])
	#print dt.attributes
	#print dt.get_subset([1,2]).get_values('winner')
	print 'entropy: ' + str(dt.get_subset().entropy())
	#print dt.data['winner']

	#print dt.entropy()
	#print dt.unclassified

	#print sys.getsizeof(dt.data['winner']['dataPoints'])
	#print dt.data['winner']['dataPoints']

	return 0
import csv
import numpy as np
import sys
from HMM import *
from DataSet import *

filename = sys.argv[1]
dSet = DataSet(filename)
dSet.readFile(200, "train")
hmm = HMM(16, 4, dSet.trainState, dSet.trainOutput)
hmm.train()
# transforms
transform_input_image = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ColorJitter(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

transform_target_image = transforms.Compose([
    transforms.Resize((256, 256)),
    # transforms.ToTensor()
])

train_set = DataSet.ImageSegmentationDataset(root_train_txt, root_segm,
                                             root_images,
                                             transform_input_image,
                                             transform_target_image)

train_loader = DataLoader(train_set, batch_size=8, shuffle=True)

test_set = DataSet.ImageSegmentationDataset(root_test_txt, root_segm,
                                            root_images, transform_input_image,
                                            transform_target_image)

test_loader = DataLoader(test_set, batch_size=8, shuffle=True)

#net = Network.UNet(in_channel=3, out_channel=21).cuda()
net = torch.load(
    '/home/freeaccess/PycharmProjects/VOCPascal/Last folder/val_loss_try_second60'
)
'''def one_hot(batch_idx, target, class_count):
import csv
from tkinter import *
from Musician import *
from DataSet import *

options2 = []
musicians = []
RowCount = 0
with open('MusiciansFakeFile.txt', newline='') as inputfile:
    for row in csv.reader(inputfile):
        RowCount = RowCount + 1
        musicians.append(
            Musician(row[0], row[1], row[2], row[3], row[4], row[5], row[6],
                     row[7]))

DS = DataSet(musicians)
FrequentGenre = DS.GenreCount.index(max(DS.GenreCount))

Qanswer = ""


def YesCallBack():
    Qanswer = "Yes"


def NoCallBack():
    Qanswer = "No"


root = Tk()
root.geometry("300x500")
class Individual:
    DataSet = DataSet()

    def Classification(self, Case):
        A = self.Genotype.Gene(0).BitString2Int()
        B = self.Genotype.Gene(1).BitString2Int()
        C = self.Genotype.Gene(2).BitString2Int()
        D = self.Genotype.Gene(3).BitString2Int()
        E = self.Genotype.Gene(4).BitString2Int()
        F = self.Genotype.Gene(5).BitString2Int()
        Number = (Case.PetalLength) * (A / B) + (Case.PetalWidth) * (C / D) + (
            E / F)
        ##        print('Number: ' + str(Number))
        DifferenceToZero = abs(Number - 0)
        DifferenceToOne = abs(Number - 1)
        DifferenceToTwo = abs(Number - 2)
        ##        if (DifferenceToZero <= DifferenceToOne) and (DifferenceToZero <= DifferenceToTwo):
        ##            return 0
        ##        elif (DifferenceToOne <= DifferenceToZero) and (DifferenceToOne <= DifferenceToTwo):
        ##            return 1
        ##        else:
        ##            return 2
        if abs(Number) > 1.5:
            return 2
        elif abs(Number) > .5:
            return 1
        else:
            return 0

    def Fitness(self):
        # returns a numeric value as the raw fitness
        if self.Genotype.Invalid():
            return 0
        CorrectnessCount = 0
        for Case in Individual.DataSet.ListOfCases:
            if self.Classification(Case) == Case.Classification:
                CorrectnessCount += 1
        return CorrectnessCount

    def WriteCorrectnessToFile(self):
        if self.Genotype.Invalid():
            return 0
        OutputFile = open('Classifier1.txt', 'w')
        CorrectnessCount = 0
        for Case in Individual.DataSet.ListOfCases:
            OutputFile.write("%s\n" % (self.Classification(Case)))
            if self.Classification(Case) == Case.Classification:
                CorrectnessCount += 1
        return CorrectnessCount
        OutputFile.close()

    def Solution(self):
        # returns a numeric value as the solution to the fitness function
        return Individual.DataSet.Length

    def FitnessAsPercentage(self, Fitness):
        CorrectnessFraction = Fitness / Individual.DataSet.Length
        return round(CorrectnessFraction * 100, 4)

    def __init__(self):
        ##        self.Genotype = XAsBitString + YAsBitString
        ##        self.Genotype = '0'
        self.Genotype = Genotype(6, -100, 100, 7)

    def OnePointCrossover(self, OtherIndividual):
        CrossoverIndex = randint(0, len(self.Genotype.BitString()))
        ##        print('CrossoverIndex: ' + str(CrossoverIndex))
        SelfLeftHalf = self.Genotype.BitString()[:CrossoverIndex]
        SelfRightHalf = self.Genotype.BitString()[CrossoverIndex:]
        ##        print('I1: ' + SelfLeftHalf + '   ' + SelfRightHalf)
        OtherLeftHalf = OtherIndividual.Genotype.BitString()[:CrossoverIndex]
        OtherRightHalf = OtherIndividual.Genotype.BitString()[CrossoverIndex:]
        ##        print('I2: ' + OtherLeftHalf + '   ' + OtherRightHalf)

        self.Genotype.SetBitString(SelfLeftHalf + OtherRightHalf)
        OtherIndividual.Genotype.SetBitString(OtherLeftHalf + SelfRightHalf)
##        print('I1: ' + self.Genotype.BitString()[:CrossoverIndex] + '   ' + self.Genotype.BitString()[CrossoverIndex:])
##        print('I2: ' + OtherIndividual.Genotype.BitString()[:CrossoverIndex] + '   ' + OtherIndividual.Genotype.BitString()[CrossoverIndex:])

    def TwoPointCrossover(self, OtherIndividual):
        LeftCrossoverIndex = randint(0, len(self.Genotype.BitString()))
        RightCrossoverIndex = randint(LeftCrossoverIndex,
                                      len(self.Genotype.BitString()))
        ##        print('LeftCrossoverIndex: ' + str(LeftCrossoverIndex))
        ##        print('RightCrossoverIndex: ' + str(RightCrossoverIndex))
        SelfLeftThird = self.Genotype.BitString()[:LeftCrossoverIndex]
        SelfMiddleThird = self.Genotype.BitString(
        )[LeftCrossoverIndex:RightCrossoverIndex]
        SelfRightThird = self.Genotype.BitString()[RightCrossoverIndex:]
        ##        print('I1: ' + SelfLeftThird + '   ' + SelfMiddleThird + '   ' + SelfRightThird)
        OtherLeftThird = OtherIndividual.Genotype.BitString(
        )[:LeftCrossoverIndex]
        OtherMiddleThird = OtherIndividual.Genotype.BitString(
        )[LeftCrossoverIndex:RightCrossoverIndex]
        OtherRightThird = OtherIndividual.Genotype.BitString(
        )[RightCrossoverIndex:]
        ##        print('I2: ' + OtherLeftThird + '   ' + OtherMiddleThird + '   ' + OtherRightThird)

        self.Genotype.SetBitString(OtherMiddleThird + SelfLeftThird +
                                   SelfRightThird)
        OtherIndividual.Genotype.SetBitString(SelfMiddleThird +
                                              OtherLeftThird + OtherRightThird)

        NewLeftCrossoverIndex = RightCrossoverIndex - LeftCrossoverIndex

##        print('I1: ' + self.Genotype.BitString()[:NewLeftCrossoverIndex] + '   ' + self.Genotype.BitString()[NewLeftCrossoverIndex:RightCrossoverIndex] + '   ' + self.Genotype.BitString()[RightCrossoverIndex:])
##        print('I2: ' + OtherIndividual.Genotype.BitString()[:NewLeftCrossoverIndex] + '   ' + OtherIndividual.Genotype.BitString()[NewLeftCrossoverIndex:RightCrossoverIndex] + '   ' + OtherIndividual.Genotype.BitString()[RightCrossoverIndex:])

    def ApplyCrossover(self, OtherIndividual, NumberOfCrossoverPoints):
        ## Applies Crossover to self and to OtherInvidual, editing their Genotype
        if NumberOfCrossoverPoints == 1:
            self.OnePointCrossover(OtherIndividual)
        else:
            self.TwoPointCrossover(OtherIndividual)

    def ApplyMutation(self):
        RandomIndex = randint(0, len(self.Genotype.BitString()) - 1)
        GenotypeAsList = list(self.Genotype.BitString())
        if GenotypeAsList[RandomIndex] == '1':
            GenotypeAsList[RandomIndex] = '0'
        else:
            GenotypeAsList[RandomIndex] = '1'
        NewGenotype = ''
        for Character in GenotypeAsList:
            NewGenotype += Character
        self.Genotype.SetBitString(NewGenotype)

    def ToString(self, NumberOfSpaces):
        return self.Genotype.BitStringToString()
Beispiel #9
0
def main(args):

    #  训练日志保存
    log_dir = os.path.join('checkpoints', args.log_dir)
    mkdir_if_missing(log_dir)

    sys.stdout = logging.Logger(os.path.join(log_dir, 'log.txt'))
    display(args)

    if args.r is None:
        model = models.create(args.net, Embed_dim=args.dim)
        # load part of the model
        model_dict = model.state_dict()
        # print(model_dict)
        if args.net == 'bn':
            pretrained_dict = torch.load(
                'pretrained_models/bn_inception-239d2248.pth')
        else:
            pretrained_dict = torch.load(
                'pretrained_models/inception_v3_google-1a9a5a14.pth')

        pretrained_dict = {
            k: v
            for k, v in pretrained_dict.items() if k in model_dict
        }

        model_dict.update(pretrained_dict)

        model.load_state_dict(model_dict)
    else:
        # resume model
        model = torch.load(args.r)

    model = model.cuda()

    torch.save(model, os.path.join(log_dir, 'model.pkl'))
    print('initial model is save at %s' % log_dir)

    # fine tune the model: the learning rate for pre-trained parameter is 1/10
    new_param_ids = set(map(id, model.Embed.parameters()))

    new_params = [p for p in model.parameters() if id(p) in new_param_ids]

    base_params = [p for p in model.parameters() if id(p) not in new_param_ids]
    param_groups = [{
        'params': base_params,
        'lr_mult': 0.1
    }, {
        'params': new_params,
        'lr_mult': 1.0
    }]

    optimizer = torch.optim.Adam(param_groups,
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)
    criterion = losses.create(args.loss, alpha=args.alpha, k=args.k).cuda()

    data = DataSet.create(args.data, root=None, test=False)
    train_loader = torch.utils.data.DataLoader(
        data.train,
        batch_size=args.BatchSize,
        sampler=RandomIdentitySampler(data.train,
                                      num_instances=args.num_instances),
        drop_last=True,
        num_workers=args.nThreads)

    for epoch in range(args.start, args.epochs):
        running_loss = 0.0
        for i, data in enumerate(train_loader, 0):
            inputs, labels = data
            # wrap them in Variable
            inputs = Variable(inputs.cuda())
            labels = Variable(labels).cuda()

            optimizer.zero_grad()

            embed_feat = model(inputs)

            loss, inter_, dist_ap, dist_an = criterion(embed_feat, labels)
            if args.orth > 0:
                loss = orth_reg(model, loss, cof=args.orth)
            loss.backward()
            optimizer.step()
            running_loss += loss.data[0]
            if epoch == 0 and i == 0:
                print(50 * '#')
                print('Train Begin -- HA-HA-HA')

        print(
            '[Epoch %05d]\t Loss: %.3f \t Accuracy: %.3f \t Pos-Dist: %.3f \t Neg-Dist: %.3f'
            % (epoch + 1, running_loss, inter_, dist_ap, dist_an))

        if epoch % args.save_step == 0:
            torch.save(model, os.path.join(log_dir, '%d_model.pkl' % epoch))
Beispiel #10
0
n[dot][email protected]
bajaj[dot][email protected]
'''

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import DataSet as ds
from LogisticRegression import LR

np.random.seed(1)
plt.close('all')

dtype = ['MOONS', 'GAUSSIANS', 'LINEAR', 'SINUSOIDAL', 'SPIRAL']

X, y, _ = ds.create_dataset(200, dtype[3], 0.05, varargin='PRESET')

print(X.shape, y.shape)

means = np.mean(X, 1).reshape(X.shape[0], -1)
stds = np.std(X, 1).reshape(X.shape[0], -1)

X = (X - means) / stds

Clf = LR(X, y, alpha=0.003, polyfit=True, degree=5, lambd=2)

plt.ion()
fig = plt.figure(figsize=(8, 4))
gs = GridSpec(1, 2)
ax1 = fig.add_subplot(gs[0, 0])
ax2 = fig.add_subplot(gs[0, 1])
	'arsons', 'arsonsPerPop',
	'violentPerPop',
	'nonViolPerPop',
]


from DataSet import *
from PCA import *

#print("\n\nstd:",   oli.X.std())
#print("\n\nmean:",  oli.X.mean())
#print("\n\nrange:", oli.X.max()-oli.X.min())

#Oli.PCA(oli)

crime = DataSet(datafile='../data/normalized.csv')

crime = crime.drop(['state', 'communityname'])       # Drop strings
crime = crime.drop(['countyCode','communityCode']) # Drop nominals
crime = crime.drop_columns([
  'fold',
  'murders', 'murdPerPop',
  'rapes', 'rapesPerPop',
  'robberies', 'robbbPerPop',
# 'assaults', 'assaultPerPop',
  'burglaries', 'burglPerPop',
  'larcenies', 'larcPerPop',
  'autoTheft', 'autoTheftPerPop',
  'arsons', 'arsonsPerPop',
  'ViolentCrimesPerPop',
  'nonViolPerPop',
def main():
    # data_path = raw_input("Enter path : ")
    DataSet.read_faces(data_path)
    print 2 * "\n*******************************************"
Beispiel #13
0
class GraphWindow(Toplevel):
    
    def __init__(self, parent, datasets=[]):
        Toplevel.__init__(self, parent)
        self.graph_frame = GraphFrame(self, datasets)
        Button(self.graph_frame, text="Close", fg="red",
	       command=self.windowClosing).pack(side=RIGHT)
        self.graph_frame.pack(fill=BOTH, expand=1)
        self.protocol("WM_DELETE_WINDOW", self.windowClosing)

    def windowClosing(self):
        self.graph_frame.graph.cleanup()
        self.destroy()

if __name__ == '__main__':

  root = Tk()
  data = DataSet()

  GF = GraphFrame(root, [data])
  GF.pack(fill=BOTH, expand=1)

  Button(root, text='Clear', command=GF.graph.clear).pack(side=LEFT)
  Button(root, text='Redraw', command=GF.graph.replot).pack(side=LEFT)
  Button(root, text='Quit',   command=root.quit).pack(side=RIGHT)

  if len(argv) > 1:
    data.readFromFile(argv[-1])

  root.mainloop()
Beispiel #14
0
def lowerbound(dataset_name, image_index, game_type, eta, tau):
    NN = NeuralNetwork(dataset_name)
    NN.load_network()
    print("Dataset is %s." % NN.data_set)
    NN.model.summary()

    dataset = DataSet(dataset_name, 'testing')
    image = dataset.get_input(image_index)
    (label, confidence) = NN.predict(image)
    label_str = NN.get_label(int(label))
    print(
        "Working on input with index %s, whose class is '%s' and the confidence is %s."
        % (image_index, label_str, confidence))
    print("The second player is being %s." % game_type)

    path = "%s_pic/idx_%s_label_[%s]_with_confidence_%s.png" % (
        dataset_name, image_index, label_str, confidence)
    NN.save_input(image, path)

    if game_type == 'cooperative':
        tic = time.time()
        cooperative = CooperativeAStar(dataset_name, image_index, image, NN,
                                       eta, tau)
        cooperative.play_game(image)
        if cooperative.ADVERSARY_FOUND is True:
            elapsed = time.time() - tic
            adversary = cooperative.ADVERSARY
            adv_label, adv_confidence = NN.predict(adversary)
            adv_label_str = NN.get_label(int(adv_label))

            print(
                "\nFound an adversary within pre-specified bounded computational resource. "
                "\nThe following is its information: ")
            print("difference between images: %s" %
                  (diffImage(image, adversary)))
            l2dist = l2Distance(image, adversary)
            l1dist = l1Distance(image, adversary)
            l0dist = l0Distance(image, adversary)
            percent = diffPercent(image, adversary)
            print("L2 distance %s" % l2dist)
            print("L1 distance %s" % l1dist)
            print("L0 distance %s" % l0dist)
            print("manipulated percentage distance %s" % percent)
            print("class is changed into '%s' with confidence %s\n" %
                  (adv_label_str, adv_confidence))

            path = "%s_pic/idx_%s_modified_into_[%s]_with_confidence_%s.png" % (
                dataset_name, image_index, adv_label_str, adv_confidence)
            NN.save_input(adversary, path)
            if eta[0] == 'L0':
                dist = l0dist
            elif eta[0] == 'L1':
                dist = l1dist
            elif eta[0] == 'L2':
                dist = l2dist
            else:
                print("Unrecognised distance metric.")
            path = "%s_pic/idx_%s_modified_diff_%s=%s_time=%s.png" % (
                dataset_name, image_index, eta[0], dist, elapsed)
            NN.save_input(np.absolute(image - adversary), path)
        else:
            print("Adversarial distance exceeds distance budget.")

    elif game_type == 'competitive':
        competitive = CompetitiveAlphaBeta(image, NN, eta, tau)
        competitive.play_game(image)

    else:
        print("Unrecognised game type. Try 'cooperative' or 'competitive'.")
Beispiel #15
0
from Input import * 
from DataSet import *

if __name__ == "__main__":

	data1 = "pop"
	data2 = "pop"
	country1 = "us"
	country2 = "us"
	state1 = "Florida"
	state2 = "New York"
	county1 = ""
	county2 = ""
	start = "1991"
	end = "2006"


	uinput = Input(data1 ,data2, country1, country2, state1, state2, county1, county2, start, end)
	d1 = DataSet(uinput.makeDictionary(1))

	print d1.data

	d2 = DataSet(uinput.makeDictionary(2))

	print d2.data
	
	r = d1.calcR(d2)

	print r 
Beispiel #16
0
    val_loss /= len(test_loader)
    val_dice0 /= len(test_loader)
    val_dice1 /= len(test_loader)
    val_dice2 /= len(test_loader)
    val_dice3 /= len(test_loader)

    print('\nTest set: Average loss: {:.6f},\tdice0: {:.6f}\tdice1: {:.6f}\tdice2: {:.6f}\tdice3: {:.6f}\n'.format(
        val_loss, val_dice0, val_dice1, val_dice2, val_dice3))


if __name__ == '__main__':
    args = config.args
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    # data info

    test_set = DataSet(args.class_num,args.crop_size, args.resize_scale, args.dataset_path, mode='test')
    test_loader = DataLoader(dataset=test_set,batch_size=args.batch_size,num_workers=1, shuffle=False)

    # model info
    # model = UNet(1, [32, 48, 64, 96, 128], 3, net_mode='3d',conv_block=RecombinationBlock).to(device)
    # model.load_state_dict(torch.load('./output/{}/state.pkl'.format(args.save)))
    # model = torch.load(args.model_path).to(device)
    # print(model)
    model = UNet(1, [32, 48, 64, 96, 128], args.class_num, net_mode='3d',conv_block=ResBlock).to(device)
    model.load_state_dict(torch.load('output/model-628-1.pth'))
    print(model)
    model.eval()
    test(model, test_loader)
    #img = sitk.ReadImage('/home/sxchongya/unet_pytorch/fixed/data/volume-5.nii.gz')
    #f = sitk.GetArrayFromImage(img)
    #f = torch.tensor(f)
Beispiel #17
0
def trainValidateSegmentation(args):
    '''
    Main function for trainign and validation
    :param args: global arguments
    :return: None
    '''
    # check if processed data file exists or not
    if not os.path.isfile(args.cached_data_file):
        dataLoad = ld.LoadData(args.data_dir, args.classes, args.cached_data_file)
        data = dataLoad.processData()
        if data is None:
            print('Error while pickling data. Please check.')
            exit(-1)
    else:
        data = pickle.load(open(args.cached_data_file, "rb"))

    q = args.q
    p = args.p
    # load the model
    if not args.decoder:
        model = net.ESPNet_Encoder(args.classes, p=p, q=q)
        args.savedir = args.savedir + '_enc_' + str(p) + '_' + str(q) + '/'
    else:
        model = net.ESPNet(args.classes, p=p, q=q, encoderFile=args.pretrained)
        args.savedir = args.savedir + '_dec_' + str(p) + '_' + str(q) + '/'

    if args.onGPU:
        model = model.cuda()

    # create the directory if not exist
    if not os.path.exists(args.savedir):
        os.mkdir(args.savedir)

    if args.visualizeNet:
        x = Variable(torch.randn(1, 3, args.inWidth, args.inHeight))

        if args.onGPU:
            x = x.cuda()

        y = model.forward(x)
        g = viz.make_dot(y)
        g.render(args.savedir + 'model.png', view=False)

    total_paramters = netParams(model)
    print('Total network parameters: ' + str(total_paramters))

    # define optimization criteria
    weight = torch.from_numpy(data['classWeights']) # convert the numpy array to torch
    if args.onGPU:
        weight = weight.cuda()

    criteria = CrossEntropyLoss2d(weight) #weight

    if args.onGPU:
        criteria = criteria.cuda()

    print('Data statistics')
    print(data['mean'], data['std'])
    print(data['classWeights'])

    #compose the data with transforms
    trainDataset_main = myTransforms.Compose([
        myTransforms.Normalize(mean=data['mean'], std=data['std']),
        myTransforms.Scale(1024, 512),
        myTransforms.RandomCropResize(32),
        myTransforms.RandomFlip(),
        #myTransforms.RandomCrop(64).
        myTransforms.ToTensor(args.scaleIn),
        #
    ])

    trainDataset_scale1 = myTransforms.Compose([
        myTransforms.Normalize(mean=data['mean'], std=data['std']),
        myTransforms.Scale(1536, 768), # 1536, 768
        myTransforms.RandomCropResize(100),
        myTransforms.RandomFlip(),
        #myTransforms.RandomCrop(64),
        myTransforms.ToTensor(args.scaleIn),
        #
    ])

    trainDataset_scale2 = myTransforms.Compose([
        myTransforms.Normalize(mean=data['mean'], std=data['std']),
        myTransforms.Scale(1280, 720), # 1536, 768
        myTransforms.RandomCropResize(100),
        myTransforms.RandomFlip(),
        #myTransforms.RandomCrop(64),
        myTransforms.ToTensor(args.scaleIn),
        #
    ])

    trainDataset_scale3 = myTransforms.Compose([
        myTransforms.Normalize(mean=data['mean'], std=data['std']),
        myTransforms.Scale(768, 384),
        myTransforms.RandomCropResize(32),
        myTransforms.RandomFlip(),
        #myTransforms.RandomCrop(64),
        myTransforms.ToTensor(args.scaleIn),
        #
    ])

    trainDataset_scale4 = myTransforms.Compose([
        myTransforms.Normalize(mean=data['mean'], std=data['std']),
        myTransforms.Scale(512, 256),
        #myTransforms.RandomCropResize(20),
        myTransforms.RandomFlip(),
        #myTransforms.RandomCrop(64).
        myTransforms.ToTensor(args.scaleIn),
        #
    ])


    valDataset = myTransforms.Compose([
        myTransforms.Normalize(mean=data['mean'], std=data['std']),
        myTransforms.Scale(1024, 512),
        myTransforms.ToTensor(args.scaleIn),
        #
    ])

    # since we training from scratch, we create data loaders at different scales
    # so that we can generate more augmented data and prevent the network from overfitting

    trainLoader = torch.utils.data.DataLoader(
        myDataLoader.MyDataset(data['trainIm'], data['trainAnnot'], transform=trainDataset_main),
        batch_size=args.batch_size + 2, shuffle=True, num_workers=args.num_workers, pin_memory=True)

    trainLoader_scale1 = torch.utils.data.DataLoader(
        myDataLoader.MyDataset(data['trainIm'], data['trainAnnot'], transform=trainDataset_scale1),
        batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True)

    trainLoader_scale2 = torch.utils.data.DataLoader(
        myDataLoader.MyDataset(data['trainIm'], data['trainAnnot'], transform=trainDataset_scale2),
        batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True)

    trainLoader_scale3 = torch.utils.data.DataLoader(
        myDataLoader.MyDataset(data['trainIm'], data['trainAnnot'], transform=trainDataset_scale3),
        batch_size=args.batch_size + 4, shuffle=True, num_workers=args.num_workers, pin_memory=True)

    trainLoader_scale4 = torch.utils.data.DataLoader(
        myDataLoader.MyDataset(data['trainIm'], data['trainAnnot'], transform=trainDataset_scale4),
        batch_size=args.batch_size + 4, shuffle=True, num_workers=args.num_workers, pin_memory=True)

    valLoader = torch.utils.data.DataLoader(
        myDataLoader.MyDataset(data['valIm'], data['valAnnot'], transform=valDataset),
        batch_size=args.batch_size + 4, shuffle=False, num_workers=args.num_workers, pin_memory=True)

    if args.onGPU:
        cudnn.benchmark = True

    start_epoch = 0

    if args.resume:
        if os.path.isfile(args.resumeLoc):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resumeLoc)
            start_epoch = checkpoint['epoch']
            #args.lr = checkpoint['lr']
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}' (epoch {})"
                .format(args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
    

    logFileLoc = args.savedir + args.logFile
    if os.path.isfile(logFileLoc):
        logger = open(logFileLoc, 'a')
    else:
        logger = open(logFileLoc, 'w')
        logger.write("Parameters: %s" % (str(total_paramters)))
        logger.write("\n%s\t%s\t%s\t%s\t%s\t" % ('Epoch', 'Loss(Tr)', 'Loss(val)', 'mIOU (tr)', 'mIOU (val'))
    logger.flush()

    optimizer = torch.optim.Adam(model.parameters(), args.lr, (0.9, 0.999), eps=1e-08, weight_decay=5e-4)
    # we step the loss by 2 after step size is reached
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.step_loss, gamma=0.5)


    for epoch in range(start_epoch, args.max_epochs):

        scheduler.step(epoch)
        lr = 0
        for param_group in optimizer.param_groups:
            lr = param_group['lr']
        print("Learning rate: " +  str(lr))

        # train for one epoch
        # We consider 1 epoch with all the training data (at different scales)
        train(args, trainLoader_scale1, model, criteria, optimizer, epoch)
        train(args, trainLoader_scale2, model, criteria, optimizer, epoch)
        train(args, trainLoader_scale4, model, criteria, optimizer, epoch)
        train(args, trainLoader_scale3, model, criteria, optimizer, epoch)
        lossTr, overall_acc_tr, per_class_acc_tr, per_class_iu_tr, mIOU_tr = train(args, trainLoader, model, criteria, optimizer, epoch)

        # evaluate on validation set
        lossVal, overall_acc_val, per_class_acc_val, per_class_iu_val, mIOU_val = val(args, valLoader, model, criteria)
        
            
        save_checkpoint({
            'epoch': epoch + 1,
            'arch': str(model),
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'lossTr': lossTr,
            'lossVal': lossVal,
            'iouTr': mIOU_tr,
            'iouVal': mIOU_val,
            'lr': lr
        }, args.savedir + 'checkpoint.pth.tar')

        #save the model also
        model_file_name = args.savedir + '/model_' + str(epoch + 1) + '.pth'
        torch.save(model.state_dict(), model_file_name)

        

        with open(args.savedir + 'acc_' + str(epoch) + '.txt', 'w') as log:
            log.write("\nEpoch: %d\t Overall Acc (Tr): %.4f\t Overall Acc (Val): %.4f\t mIOU (Tr): %.4f\t mIOU (Val): %.4f" % (epoch, overall_acc_tr, overall_acc_val, mIOU_tr, mIOU_val))
            log.write('\n')
            log.write('Per Class Training Acc: ' + str(per_class_acc_tr))
            log.write('\n')
            log.write('Per Class Validation Acc: ' + str(per_class_acc_val))
            log.write('\n')
            log.write('Per Class Training mIOU: ' + str(per_class_iu_tr))
            log.write('\n')
            log.write('Per Class Validation mIOU: ' + str(per_class_iu_val))

        logger.write("\n%d\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.7f" % (epoch, lossTr, lossVal, mIOU_tr, mIOU_val, lr))
        logger.flush()
        print("Epoch : " + str(epoch) + ' Details')
        print("\nEpoch No.: %d\tTrain Loss = %.4f\tVal Loss = %.4f\t mIOU(tr) = %.4f\t mIOU(val) = %.4f" % (epoch, lossTr, lossVal, mIOU_tr, mIOU_val))
    logger.close()
Beispiel #18
0
from sklearn import preprocessing

scaler = preprocessing.StandardScaler().fit(training_vectors)
scaled_training_vectors = scaler.transform(training_vectors)

test_scaler = preprocessing.StandardScaler().fit(test_vectors)
scaled_test_vectors = test_scaler.transform(test_vectors)
################################################################################

################################################################################
# use (my) DataSet class to provide 'next_batch' functionality to TensorFlow
# Also changes labels to 'one-hot' 2D arrays

import DataSet

training_dataset = DataSet.load_dataset(scaled_training_vectors,
                                        training_labels)
test_dataset = DataSet.load_dataset(scaled_test_vectors, test_labels)
################################################################################

################################################################################
# Solve with (Google) TensorFlow
import tensorflow as tf

# Create the model
# 7 elements in each feature vector, 9 possible facies

x = tf.placeholder(tf.float32, [None, 7])
W = tf.Variable(tf.zeros([7, 9]))
b = tf.Variable(tf.zeros([9]))
y = tf.matmul(x, W) + b
Beispiel #19
0
import DataSet
from config import *

DS_INPUT = np.array(([0, 0], [1, 0], [0, 1], [1, 1]), dtype=np.float64)
DS_OUTPUT = np.array(([0], [1], [1], [0]), dtype=np.float64)
DATA_SET = DataSet.DataSet(DS_INPUT, DS_OUTPUT)


def test_primes():
    print DATA_SET
    config = simple_config
    # get setup
    neural_network, trainer = create_training_setup(config, (1, 2), (1, 1))
    # start training
    print_title("test primes")
    trainer.train(neural_network, DATA_SET)
    # test
    test_sample = DATA_SET.getSample()
    neural_network.feed(test_sample.inputs)
    print "output: "
    print neural_network.get_output()
    print "expected: "
    print outMatrix
    print "error: ", trainer.get_average_error(neural_network,
                                               test_sample.outputs)

    # visual output
    if PLOT_NEURAL_NET:
        network = Visualizer.DrawNN([DEFAULT_INPUT_DIM[0]] +
                                    [config[0]] * config[1] +
                                    [DEFAULT_OUTPUT_DIM[0]])
Beispiel #20
0
        gestureNames.append(totalGestureNames[i])
    gestureNames.append('no gesture')
    
     
    
    
    
    
    
    #read datasets and add them to dataStep
    trainSets = []
    randTestSets = []
    dataStep = []
    
    for fileName in inputFiles:
        ind, t  = DataSet.createData(fileName, inputGestures,usedGestures)
        dataStep.append((ind,t))
        
    segs = splitBySignals(dataStep)

    #if desired shuffle and refraction the gestures
    if(shuffle):
        dataStep = shuffleDataStep(dataStep, nFolds)
    
    
    #if desired stretch testset
    newDataStep = []
    for ind, t in dataStep:
        
        indSets = []
        tSets = []
Beispiel #21
0
# import dataio as dio
import thinkstats2 as ts2
import thinkplot as tplt
import DataSet
import pdb
from random import choice

H155 = DataSet.DataSet('h155.pkl')

df = H155.df
print df.shape

dfr = df[df['INSCOPE'] == ]
print dfr.shape

c1 = choice(H155.varnames.keys())
c2 = choice(H155.varnames.keys())

print c1, H155.varnames[c1]
print c2, H155.varnames[c2]
Beispiel #22
0
def load_patterns(ds_paths, ds_files):
    # load pattern data
    dataSet = ds.DataSet(ds_paths)
    dataSet.read_csv_file(ds_files)
    #print(len(dataSet.all_patterns[192:,:]))
    return dataSet
Beispiel #23
0
    'larcPerPop',
    #	'autoTheft', 'autoTheftPerPop',
    'arsons',
    'arsonsPerPop',
    'violentPerPop',
    'nonViolPerPop',
]

from DataSet import *
from PCA import *

import matplotlib.pyplot as plt

dataset = DataSet(data,
                  names,
                  class_column='State',
                  drop_columns=drop_columns,
                  fix_missing=FixMissing.FILLMEAN,
                  rescale=Rescale.NORMALIZE)

y = np.mat(np.zeros((len(names), 1)))

TO = 50
X = dataset.X.values[0:TO, :]
y = dataset.y[0:TO]
N, M = X.shape
classNames = dataset.classNames
attributeNames = dataset.attributeNames[0:TO]

# exercise 7.2.4

from pylab import *
Beispiel #24
0
def Model2Feature(data,
                  net,
                  checkpoint,
                  root=None,
                  nThreads=16,
                  batch_size=100,
                  pool_feature=False,
                  **kargs):
    dataset_name = data
    model = models.create(net, pretrained=False, normalized=True)
    # resume = load_checkpoint(ckp_path)
    resume = checkpoint

    model.load_state_dict(resume['state_dict'])
    model.eval()
    model = torch.nn.DataParallel(model).cuda()
    data = DataSet.create(name=data, root=root, set_name='test')

    if dataset_name in ['shop', 'jd_test']:
        gallery_loader = torch.utils.data.DataLoader(data.gallery,
                                                     batch_size=batch_size,
                                                     shuffle=False,
                                                     drop_last=False,
                                                     pin_memory=True,
                                                     num_workers=nThreads)

        query_loader = torch.utils.data.DataLoader(data.query,
                                                   batch_size=batch_size,
                                                   shuffle=False,
                                                   drop_last=False,
                                                   pin_memory=True,
                                                   num_workers=nThreads)

        gallery_feature, gallery_labels = extract_features(
            model,
            gallery_loader,
            print_freq=1e5,
            metric=None,
            pool_feature=pool_feature)
        query_feature, query_labels = extract_features(
            model,
            query_loader,
            print_freq=1e5,
            metric=None,
            pool_feature=pool_feature)

    else:  #here

        print('using else')
        data_loader = torch.utils.data.DataLoader(data.test,
                                                  batch_size=batch_size,
                                                  shuffle=True,
                                                  drop_last=True,
                                                  pin_memory=True,
                                                  num_workers=nThreads)

        features, labels = extract_features(model,
                                            data_loader,
                                            pool_feature=pool_feature)

        #全等?
        gallery_feature, gallery_labels = query_feature, query_labels = features, labels
    return gallery_feature, gallery_labels, query_feature, query_labels
param_groups = model.parameters()
learn_rate = args.lr
# optimizer = optim.Adam(param_groups, lr=learn_rate,
#                        weight_decay=args.weight_decay)
optimizer = optim.SGD(param_groups,
                      lr=learn_rate,
                      momentum=0.9,
                      weight_decay=0.00005)

#get train_loader
if 'mxnet' in args.net:
    normalize = transforms.Normalize(mean=[123, 117, 104], std=[1, 1, 1])
else:
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
data = DataSet.create(args.data, root=None, test=False)
train_loader = torch.utils.data.DataLoader(
    data.train,
    batch_size=args.BatchSize,
    sampler=RandomIdentitySampler(data.train,
                                  num_instances=args.num_instances),
    drop_last=False,
    num_workers=args.nThreads)


def adjust_learning_rate(opt_, epoch_, num_epochs):
    """Sets the learning rate to the initial LR decayed by 1000 at last epochs"""
    if epoch_ > (num_epochs - args.step):
        lr = args.lr * \
             (0.01 ** ((epoch_ + args.step - num_epochs) / float(args.step)))
        for param_group in opt_.param_groups:
Beispiel #26
0
                    help='path of train data')
parser.add_argument('--epoch',
                    default=5,
                    type=int,
                    help='number of train epoches')
parser.add_argument('--lr',
                    default=1e-3,
                    type=float,
                    help='initial learning rate for Adam')
args = parser.parse_args()

data_transform = transforms.Compose([transforms.ToTensor()])

# The whole dataset
dataset = DataSet.imageDataset(args.normal_data,
                               args.blurry_data,
                               transform=data_transform)

# Spliting the dataset
train_loader, validation_loader = prepareLoaders(
    dataset,
    shuffle_dataset=True,
    batch_size=args.batch_size,
)

# Saving directory
save_dir = os.path.join('models', args.model)

# Create the saving directory if not exists
if not os.path.exists(save_dir):
    os.mkdir(save_dir)
Beispiel #27
0
    filesource = 'Haberman.txt'
    fileAttr = [
        'Age', 'Patient year of operation',
        'Number of positive axillary nodes detected', ' Survival status'
    ]
    posClassification = 3
    fileClass = ['1', '2']

elif index == 4:
    filesource = 'MammographicMasses.txt'
    fileAttr = ['BI-RADS', 'Age', 'Shape', 'Margin', 'Density', ' Severity']
    posClassification = 5
    fileClass = ['0', '1']

#chiamo il metodo che fa il parsing del file .txt del dataset scelto
fileDataset = DataSet.setDataSet(filesource, fileAttr, posClassification,
                                 fileClass)


#rimuove valori nel dataset con probabilita uniforme (funziona bene con i valori di p 0.1, 0.2, 0.5)
def removeValAttrWithProb(dataset, p):
    a = 1 / p
    for n in range(len(dataset.examples)):
        for j in range(len(dataset.examples[n])):
            i = random.randint(1, a)
            if i == 1:
                dataset.examples[n][j] = None
    return dataset


#creo il validation set per il 10 fold cross validation
def testing(fileDataset, number):
Beispiel #28
0
def main(args):
    # s_ = time.time()

    save_dir = args.save_dir
    mkdir_if_missing(save_dir)

    sys.stdout = logging.Logger(os.path.join(save_dir, 'log.txt'))
    display(args)
    start = 0

    model = models.create(args.net, pretrained=True, dim=args.dim)

    # for vgg and densenet
    if args.resume is None:
        model_dict = model.state_dict()

    else:
        # resume model
        print('load model from {}'.format(args.resume))
        chk_pt = load_checkpoint(args.resume)
        weight = chk_pt['state_dict']
        start = chk_pt['epoch']
        model.load_state_dict(weight)

    model = torch.nn.DataParallel(model)
    model = model.cuda()

    # freeze BN
    if args.freeze_BN is True:
        print(40 * '#', '\n BatchNorm frozen')
        model.apply(set_bn_eval)
    else:
        print(40 * '#', 'BatchNorm NOT frozen')

    # Fine-tune the model: the learning rate for pre-trained parameter is 1/10
    new_param_ids = set(map(id, model.module.classifier.parameters()))

    new_params = [
        p for p in model.module.parameters() if id(p) in new_param_ids
    ]

    base_params = [
        p for p in model.module.parameters() if id(p) not in new_param_ids
    ]

    param_groups = [{
        'params': base_params,
        'lr_mult': 0.0
    }, {
        'params': new_params,
        'lr_mult': 1.0
    }]

    print('initial model is save at %s' % save_dir)

    if args.optim == 'sgd':
        optimizer = torch.optim.SGD(param_groups,
                                    args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay,
                                    nesterov=args.nesterov)
    elif args.optim == 'adam':
        optimizer = torch.optim.Adam(param_groups,
                                     lr=args.lr,
                                     weight_decay=args.weight_decay)

    else:
        raise ValueError('Unsupported optimizer type')

    criterion = losses.create(args.loss,
                              margin=args.margin,
                              alpha=args.alpha,
                              base=args.loss_base).cuda()

    # Decor_loss = losses.create('decor').cuda()
    data = DataSet.create(args.data,
                          ratio=args.ratio,
                          width=args.width,
                          origin_width=args.origin_width,
                          root=args.data_root)

    train_loader = torch.utils.data.DataLoader(
        data.train,
        batch_size=args.batch_size,
        sampler=FastRandomIdentitySampler(data.train,
                                          num_instances=args.num_instances),
        drop_last=True,
        pin_memory=True,
        num_workers=args.nThreads)

    # save the train information

    for epoch in range(start, args.epochs):

        train(epoch=epoch,
              model=model,
              criterion=criterion,
              optimizer=optimizer,
              train_loader=train_loader,
              args=args)

        if epoch == 1:
            optimizer.param_groups[0]['lr_mult'] = 0.1

        if (epoch + 1) % args.save_step == 0 or epoch == 0:
            if use_gpu:
                state_dict = model.module.state_dict()
            else:
                state_dict = model.state_dict()

            save_checkpoint({
                'state_dict': state_dict,
                'epoch': (epoch + 1),
            },
                            is_best=False,
                            fpath=osp.join(
                                args.save_dir,
                                'ckp_ep' + str(epoch + 1) + '.pth.tar'))
def cross_val(args):

    torch.set_default_tensor_type('torch.DoubleTensor')

    allele_list_9 = [
        'HLA-A*02:01', 'HLA-A*03:01', 'HLA-A*11:01', 'HLA-A*02:03',
        'HLA-B*15:01', 'HLA-A*31:01', 'HLA-A*01:01', 'HLA-B*07:02',
        'HLA-A*26:01', 'HLA-A*02:06', 'HLA-A*68:02', 'HLA-B*08:01',
        'HLA-B*58:01', 'HLA-B*40:01', 'HLA-B*27:05', 'HLA-A*30:01',
        'HLA-A*69:01', 'HLA-B*57:01', 'HLA-B*35:01', 'HLA-A*02:02',
        'HLA-A*24:02', 'HLA-B*18:01', 'HLA-B*51:01', 'HLA-A*29:02',
        'HLA-A*68:01', 'HLA-A*33:01', 'HLA-A*23:01'
    ]

    allele_list_10 = [
        'HLA-A*02:01', 'HLA-A*03:01', 'HLA-A*11:01', 'HLA-A*68:01',
        'HLA-A*31:01', 'HLA-A*02:06', 'HLA-A*68:02', 'HLA-A*02:03',
        'HLA-A*33:01', 'HLA-A*02:02'
    ]

    if not os.path.exists(args.savedir):
        os.mkdir(args.savedir)

    logFileLoc = args.savedir + os.sep + args.testFile

    if os.path.isfile(logFileLoc):
        logger = open(logFileLoc, 'a')
        logger.write("%s\t%s\t\t\t%s\n" % ('Length', 'Allele', 'AUC'))
        logger.flush()
    else:
        logger = open(logFileLoc, 'w')
        logger.write("%s\t%s\t\t\t%s\n" % ('Length', 'Allele', 'AUC'))
        logger.flush()

    for length in [10, 9]:

        if length == 9:
            allele_list = allele_list_9
        elif length == 10:
            allele_list = allele_list_10
        else:
            print("Invalid Length")
            exit(0)

        for allele in allele_list:  #[9,10]

            model_dir = args.savedir + os.sep + 'best_model' + os.sep + allele
            if not os.path.isdir(model_dir):
                os.makedirs(model_dir)

            data_dict = pickle.load(
                open(
                    args.data_dir + os.sep + 'pickle_' + str(length) +
                    '_binary' + os.sep +
                    allele.replace('*', '.').replace(':', '_') + '.p', 'rb'))

            print('test on allele: ' + data_dict['allele'])
            if not length == data_dict['sequ_length']:
                print('length error')
                exit()

            encode_channel = data_dict['channel_encode']
            meas = data_dict['label']
            # bind = []
            # for i in meas:
            #     i = (-1) * math.log10(i);
            #     bind.append(i)
            sequ, label = encode_channel, meas

            if (len(sequ) > 5):

                sequ_ori, label_ori = sequ, label

                output_list = []
                label_list = []

                test_data_load = torch.utils.data.DataLoader(
                    myDataLoader.MyDataset(sequ_ori, label_ori),
                    batch_size=args.batch_size,
                    shuffle=True,
                    num_workers=args.num_workers,
                    pin_memory=True)

                model = net.ResNetC1()

                if args.onGPU == True:
                    #model = torch.nn.DataParallel(model, device_ids=[0,1,2,3]).cuda()
                    model = model.cuda()

                criteria = MSELoss()

                if args.onGPU == True:
                    criteria = criteria.cuda()

                output_sum, label = [], []

                for fold_num in range(1, 6):

                    best_model_dict = torch.load(model_dir + os.sep + allele +
                                                 '_' + str(length) + '_' +
                                                 str(fold_num) + '.pth')
                    model.load_state_dict(best_model_dict)
                    _, _, output, label = val(args, test_data_load, model,
                                              criteria)

                    if not output_sum:
                        output_sum.extend(output)
                    else:
                        output_sum = [
                            output_sum[i] + output[i]
                            for i in range(len(output_sum))
                        ]

                final_out = [output_sum[i] / 5 for i in range(len(output_sum))]
                output_list.extend(final_out)
                label_list.extend(label)

                IC_output_list = [
                    math.pow(10, (-1) * value) for value in output_list
                ]
                #IC_label_list = [math.pow(10, (-1) * value) for value in label_list]

                bi_output_list = [
                    1 if ic < 500 else 0 for ic in IC_output_list
                ]
                #bi_label_list = [1 if ic < 500 else 0 for ic in IC_label_list]

                #pearson = pearsonr(IC_output_list, IC_label_list)
                auc = roc_auc_score(label_list, bi_output_list)
                #srcc = spearmanr(IC_output_list, IC_label_list)

                logger.write("%s\t%s\t\t%.4f\n" % (length, allele, auc))
                logger.flush()

                prediction = args.savedir + os.sep + args.predict
                if os.path.exists(prediction):
                    append_write = 'a'  # append if already exists
                else:
                    append_write = 'w'

                true_value = open(prediction, append_write)
                true_value.write("%s\n" % (allele))
                for i in range(len(output_list)):
                    true_value.write("%.4f\t%.4f\n" %
                                     (label_list[i], output_list[i]))
                true_value.flush()

    logger.close()
def load_patterns():
    # load pattern data
    dataSet = ds.DataSet('/home/adriano/Projects/ANNDispersionRelation/ann_training/2d/square/te/tests_new_db/16_interpolated_points/')
    dataSet.read_csv_file('dr_te_pc_dataset.csv')  
    #print(len(dataSet.all_patterns[192:,:]))
    return dataSet
Beispiel #31
0
def trainModel(expDir='null', ii=0):
    data_ = open('../data_dir.txt', 'r')
    datasets_dir = data_.readline().split()[0]
    mnist = DataSet.read_data_sets(data_dir=datasets_dir)

    config = ConfigParser()
    config.read(expDir + 'input_configuration')

    mode = config.get('MAIN_PARAMETER_SETTING', 'mode')
    l_rate = config.getfloat('MAIN_PARAMETER_SETTING', 'learning_rate')
    momentum = config.getfloat('MAIN_PARAMETER_SETTING', 'momentum')
    gamma = config.getfloat('MAIN_PARAMETER_SETTING', 'gamma')
    p_input = config.getfloat('MAIN_PARAMETER_SETTING', 'p_input')
    p_conv = config.getfloat('MAIN_PARAMETER_SETTING', 'p_conv')
    p_fc = config.getfloat('MAIN_PARAMETER_SETTING', 'p_fc')
    noise = config.getfloat('MAIN_PARAMETER_SETTING', 'noise')
    numepochs = config.getint('MAIN_PARAMETER_SETTING', 'training_epochs')

    if mode == 'scheduled_dropout':

        def _prob(x, gamma, p):
            return (1. - p) * np.exp(-gamma * x) + p
    elif mode == 'ann_dropout':

        def _prob(x, gamma, p):
            return -(1. - p) * np.exp(-gamma * x) + 1
    elif mode == 'regular_dropout':

        def _prob(x, gamma, p):
            return p

    sess = tf.InteractiveSession()
    #(config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True)))
    #config=tf.ConfigProto(gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.4)))

    x = tf.placeholder(tf.float32, shape=[None, 784])
    y_ = tf.placeholder(tf.float32, shape=[None, 10])

    # DROPOUT
    # placeholder for the probability that a neuron's output is kept during dropout
    # keep_prob will be give to feed_dict to control the dropout rate
    keep_prob_input = tf.placeholder(tf.float32)
    keep_prob_conv = tf.placeholder(tf.float32)
    keep_prob_fc = tf.placeholder(tf.float32)

    # FIRST CONV LAYER
    #dim_conv1 = int(96/p_conv)
    dim_conv1 = 32
    # The convolutional layer computes 64 features for each 5x5 patch.
    # Its weight tensor has a shape of [5, 5, 3, 64] [5x5 patch, input channels, output channels]
    W_conv1 = weight_variable([5, 5, 1, dim_conv1], noise)
    #  bias vector with a component for each output channel
    b_conv1 = bias_variable([dim_conv1], noise)
    #To apply the layer, we first reshape x to a 4d tensor
    # Second and third dimensions correspond to image width and height
    # Final dimension corresponding to the number of color channels.
    x_image = tf.reshape(x, [-1, 28, 28, 1])
    x_image_drop = tf.nn.dropout(x_image, keep_prob_input)
    # convolve x_image with the weight tensor, add the bias, apply ReLU
    h_conv1 = tf.nn.relu(conv2d(x_image_drop, W_conv1) + b_conv1)
    # finally max pool
    h_pool1 = max_pool_2x2(h_conv1)  # now the image is 14*14
    h_pool1_drop = tf.nn.dropout(h_pool1, keep_prob_conv)

    # SECOND CONV LAYER
    # Initialize variables
    #dim_conv2 = int(128/p_conv)
    dim_conv2 = 48
    W_conv2 = weight_variable([5, 5, dim_conv1, dim_conv2], noise)
    b_conv2 = bias_variable([dim_conv2], noise)
    # Contruct the graph
    h_conv2 = tf.nn.relu(conv2d(h_pool1_drop, W_conv2) + b_conv2)
    h_pool2 = max_pool_2x2(h_conv2)  # now the image is 7*7
    h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * dim_conv2])
    h_pool2_flat_drop = tf.nn.dropout(h_pool2_flat, keep_prob_conv)

    #~ ## THIRD CONV LAYER
    #~ # Initialize variables
    #~ #dim_conv3 = int(256/p_fc)
    #~ dim_conv3 = 256
    #~ W_conv3 = weight_variable([5, 5, dim_conv2, dim_conv3], noise)
    #~ b_conv3 = bias_variable([dim_conv3],noise)
    #~ # Contruct the graph
    #~ h_conv3 = tf.nn.relu(conv2d(h_pool2_drop, W_conv3) + b_conv3)
    #~ h_pool3 = max_pool_3x3(h_conv3) # now the image is 4*4?

    #~ h_pool3_flat = tf.reshape(h_pool3, [-1, 4 * 4 * dim_conv3])
    #~ h_pool3_flat_drop = tf.nn.dropout(h_pool3_flat, keep_prob_conv)

    # DENSE LAYER 1
    #DIM_1 = int(2048/p_fc)
    DIM_1 = 2048
    W_fc1 = weight_variable([7 * 7 * dim_conv2, DIM_1], noise)
    b_fc1 = bias_variable([DIM_1], noise)
    h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat_drop, W_fc1) + b_fc1)
    h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob_fc)

    # DENSE LAYER 2
    #DIM_2 = int(2048/p_fc)
    DIM_2 = 1024
    W_fc2 = weight_variable([DIM_1, DIM_2], noise)
    b_fc2 = bias_variable([DIM_2], noise)
    h_fc2 = tf.nn.relu(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
    h_fc2_drop = tf.nn.dropout(h_fc2, keep_prob_fc)

    # READOUT LAYER
    W_out = weight_variable([DIM_2, 10], noise)
    b_out = bias_variable([10], noise)
    y_conv = tf.matmul(h_fc2_drop, W_out) + b_out

    # Loss Function for evaluation (i.e. compare with actual labels)
    cross_entropy = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(logits=y_conv, labels=y_))

    # the actual operation on the graph
    train_step = tf.train.AdamOptimizer(l_rate,
                                        beta1=momentum).minimize(cross_entropy)
    #train_step = tf.train.GradientDescentOptimizer(1e-4,beta1=0.999).minimize(cross_entropy)

    # EVALUATE THE MODEL
    correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
    #correct_prediction = tf.equal(tf.nn.top_k(y_conv,2)[1], tf.nn.top_k(y_,2)[1])
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    #AP  = sparse_average_precision_at_k(y_conv, tf.cast(y_, tf.int64), 1)
    #mAP =  tf.reduce_mean(tf.cast(AP, tf.float32))

    #################################################

    # Run the session to initialize variables
    sess.run(tf.global_variables_initializer())
    #sess.run(tf.local_variables_initializer())

    # Keep some history
    accTrSet = []
    accTeSet = []
    accValSet = []
    Xentr = []
    gammaValues = []

    # Some training params
    TRAIN_EVAL = True
    TEST_EVAL = True
    VALID = True
    batchsize = 128
    eval_batchsize = 200

    num_train_batches = mnist.train.labels.shape[0] / batchsize
    numiter = numepochs * num_train_batches
    num_test_batches = mnist.test.labels.shape[0] / eval_batchsize
    #num_valid_batches = mnist.validation.labels.shape[0] / eval_batchsize
    print("Epochs: %d \t Training batches: %d \t Iterations: %d \t Mode: %s"\
                    %(numepochs, num_train_batches, numiter, mode))

    start_time = time.time()
    ## TRAINING ITERATIONS
    for i in range(int(numiter)):

        # Dropout probabilities for this iteration
        _prob_input = _prob(i, gamma, p_input)
        _prob_conv = _prob(i, gamma, p_conv)
        _prob_fc = _prob(i, gamma, p_fc)
        gammaValues.append(_prob_fc)

        ###################################################
        # calculate accuracies and cost every 500 iterations
        if i % 100 == 0 and i != 0:
            ##############################################
            # calculate TRAIN  accuracy on the SINGLE BATCH
            #train_accuracy, xentropy = sess.run((accuracy, cross_entropy),
            #feed_dict={x:batch[0], y_: batch[1],
            #keep_prob_input: 1.0, keep_prob_conv: 1.0, keep_prob_fc: 1.0}) # no dropout
            #accTrSet.append(train_accuracy)
            #Xentr.append(xentropy)

            ##############################################
            # calculate TRAINING accuracy on the whole training set
            train_accuracy = 0.
            xentropy = 0.
            if TRAIN_EVAL:
                for j in range(
                        int(num_train_batches)):  # Must be done batchwise
                    batch = mnist.train.next_batch(batchsize)
                    t_a, x_e = sess.run(
                        (accuracy, cross_entropy),
                        feed_dict={
                            x: batch[0],
                            y_: batch[1],
                            keep_prob_input: 1.0,
                            keep_prob_conv: 1.0,
                            keep_prob_fc: 1.0
                        })  # no dropout

                    train_accuracy += t_a
                    xentropy += x_e
                train_accuracy = train_accuracy / num_train_batches
                xentropy = xentropy / num_train_batches
                accTrSet.append(train_accuracy)
                Xentr.append(xentropy)

            ##############################################
            # calculate TEST accuracy on the whole test set
            test_accuracy = 0.
            if TEST_EVAL:
                for j in range(
                        int(num_test_batches)):  # Must be done batchwise
                    batch = mnist.test.next_batch(eval_batchsize)
                    test_accuracy += accuracy.eval(
                        feed_dict={
                            x: batch[0],
                            y_: batch[1],
                            keep_prob_input: 1.0,
                            keep_prob_conv: 1.0,
                            keep_prob_fc: 1.0
                        })  # no dropout
                test_accuracy = test_accuracy / num_test_batches
                accTeSet.append(test_accuracy)

            ##############################################
            # Perform validation for early stopping
            valid_accuracy = 0.
            if VALID:
                # calculate VALIDATION accuracy on the whole validation set
                valid_accuracy = accuracy.eval(
                    feed_dict={
                        x: mnist.validation.images,
                        y_: mnist.validation.labels,
                        keep_prob_input: 1.0,
                        keep_prob_conv: 1.0,
                        keep_prob_fc: 1.0
                    })  # no dropout
                accValSet.append(valid_accuracy)
                #print("Droput prob: %f"%(prob))

                ## Early stopping
                #if len(accValSet)>5 and accValSet[-1]<accValSet[-2]:
                #break

            duration = time.time() - start_time
            start_time = time.time()
            print("step %d: \t cross entropy: %f \t training accuracy: %f \t test accuracy: %f \t valid accuracy: %f \t prob: %f \t time: %f"\
                            %(i,  xentropy, train_accuracy, test_accuracy, valid_accuracy, _prob_fc, duration))

        ## The actual training step
        # SCHEDULING DROPOUT: no droput at first, tends to 0.5 as iterations increase
        batch = mnist.train.next_batch(batchsize)

        train_step.run(
            feed_dict={
                x: batch[0],
                y_: batch[1],
                keep_prob_input: _prob_input,  #0.9
                keep_prob_conv: _prob_conv,  #0.75
                keep_prob_fc: _prob_fc
            })  #0.5  # CHANGE HERE

    #!# End of training iterations #

    # Finally test on the test set #
    ## Testing on small gpus must be done batch-wise to avoid OOM
    test_accuracy = 0
    for j in range(num_test_batches):
        batch = mnist.test.next_batch(batchsize)
        test_accuracy += accuracy.eval(
            feed_dict={
                x: batch[0],
                y_: batch[1],
                keep_prob_input: 1.0,
                keep_prob_conv: 1.0,
                keep_prob_fc: 1.0
            })  # no dropout

    print("test accuracy: %g" % (test_accuracy / num_test_batches))

    f = file(expDir + str(ii) + 'accuracies.pkl', 'w')
    cPickle.dump((accTrSet, accValSet, accTeSet, Xentr),
                 f,
                 protocol=cPickle.HIGHEST_PROTOCOL)
    f.close()

    sess.close()
Beispiel #32
0
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
import DataSet

X, y = DataSet.dataset()

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=42)

knn = KNeighborsClassifier(n_neighbors=5, algorithm='kd_tree')

knn.fit(X_train, y_train)

print('模型的准确度:{}'.format(knn.score(X_test, y_test)))
Beispiel #33
0
def recommend(user, train, w, k, n):
    rank = dict()
    rui = 1.0
    fileter = train[user]
    for i in fileter:
        for j, wij in sorted(w[i].items(), key=lambda x: x[1],
                             reverse=True)[0:k]:
            if j in fileter:
                continue
            rank[j] = wij * rui
    rank = dict(sorted(rank.items(), key=lambda x: x[1], reverse=True)[0:n])
    return rank


if __name__ == '__main__':
    dataset = DataSet.openfile('ratings.csv')
    train, test = DataSet.splitdata_d(dataset)
    w = itemsimliarity(train)
    re = {}
    for user in test.keys():
        re[user] = recommend(user, train, w, 10, 10)

    x = Evaluating.recall(test, re)
    y = Evaluating.precision(test, re)
    z = Evaluating.coverage(train, test, re)
    a = Evaluating.popularity(train, re)
    print(x)
    print(y)
    print(z)
    print(a)
Beispiel #34
0
import pdb
from DataSet import *
from AeModel import *

data_set: DataSet = DataSet()
ae_model: AeModel = AeModel(data_set)

ae_model.define_my_model()
ae_model.compile_the_model()
ae_model.train_the_model()
ae_model.evaluate_the_model()
Beispiel #35
0
parser.add_argument('-test',
                    type=int,
                    default=1,
                    help='evaluation on test set or train set')

args = parser.parse_args()

# model = inception_v3(dropout=0.5)
model = torch.load(args.r)
model = model.cuda()

temp = args.r.split('/')
name = temp[-1][:-10]
if args.test == 1:
    data = DataSet.create(args.data, train=False)
    data_loader = torch.utils.data.DataLoader(data.test,
                                              batch_size=128,
                                              shuffle=False,
                                              drop_last=False)
else:
    data = DataSet.create(args.data, test=False)
    data_loader = torch.utils.data.DataLoader(data.train,
                                              batch_size=128,
                                              shuffle=False,
                                              drop_last=False)

features, labels = extract_features(model,
                                    data_loader,
                                    print_freq=1e5,
                                    metric=None)
				value = self.getBranch(value, branch)

			return self.classify(example, nodeAt)

		else:
			if nodeAt is not None: return nodeAt.getLabel()
			return None


if __name__=="__main__":
	import DataSet

	f = "/Users/ducrix/Documents/Research/Python/data/ml/test_weather.gla"
	#f = "/Users/ducrix/Documents/Research/Python/data/ml/test_genders.gla"
	#f = "/Users/ducrix/Documents/Research/Python/data/ml/test_cars.gla"
	#f = "/Users/ducrix/Documents/Research/Python/data/ml/test_words.gla"

	ds = DataSet.DataSet(f)

	train, test = ds.getTrainTestSet(.1)

	dt = DT(train, ds.getAttributes(), 2)
	#print dt.data[:]
	#print dt.classify(test[0])
	t = dt.test(test)
	p = [tt[0]==tt[1] for tt in t]
	print p, t
	print (1.*p.count(True))/len(p)
	#print len(train), [t.getValues() for t in train]
	#print len(test), [t.getValues() for t in test]
Beispiel #37
0
import tensorflow as tf
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
import DataSet

data, label = DataSet.dataset()

num_classes = 8

label = np.eye(num_classes)[label.reshape(-1).astype(np.int8)]

X_train, X_test, y_train, y_test = train_test_split(data,
                                                    label,
                                                    random_state=0,
                                                    test_size=0.2)

num_features = 408
num_h1 = 100
num_h2 = 100
learning_rate = 0.001

X = tf.placeholder(tf.float32, shape=(None, num_features))
y = tf.placeholder(tf.float32, shape=(None, num_classes))

weights = {
    'weight_h1':
    tf.Variable(tf.truncated_normal((num_features, num_h1), stddev=0.1)),
    'weight_h2':
    tf.Variable(tf.truncated_normal((num_h1, num_h2), stddev=0.1)),
    'weight_ouput':
Beispiel #38
0
def main(argv):
    """

    :param argv:
    :return:
    """
    #os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    """ Create dirs for saving models and logs
    """
    model_path_suffix = datetime.datetime.now().strftime('%Y_%m_%d_%H')
    model_save_dir = os.path.join('models', 'weights', model_path_suffix)
    train_log_save_dir = os.path.join('models', 'logs', model_path_suffix,
                                      'train')
    test_log_save_dir = os.path.join('models', 'logs', model_path_suffix,
                                     'test')
    os.system('mkdir -p {}'.format(model_save_dir))
    os.system('mkdir -p {}'.format(train_log_save_dir))
    os.system('mkdir -p {}'.format(test_log_save_dir))
    """ Create data generator
    """
    data_generator = DataSet(FLAGS.train_img_dir,
                             FLAGS.batch_size,
                             FLAGS.input_size,
                             FLAGS.heatmap_size,
                             FLAGS.normalize_img,
                             FLAGS.category,
                             FLAGS.joint_gaussian_variance,
                             FLAGS.num_of_joints,
                             FLAGS.center_radius,
                             sample_set='train').data_generator
    data_generator_eval = DataSet(FLAGS.val_img_dir,
                                  FLAGS.batch_size,
                                  FLAGS.input_size,
                                  FLAGS.heatmap_size,
                                  FLAGS.normalize_img,
                                  FLAGS.category,
                                  FLAGS.joint_gaussian_variance,
                                  FLAGS.num_of_joints,
                                  FLAGS.center_radius,
                                  sample_set='valid').data_generator
    """ Build network graph
    """
    model = cpm_model.CPM_Model(input_size=FLAGS.input_size,
                                heatmap_size=FLAGS.heatmap_size,
                                stages=FLAGS.cpm_stages,
                                joints=FLAGS.num_of_joints,
                                img_type=FLAGS.color_channel,
                                is_training=True)
    model.build_loss(FLAGS.init_lr,
                     FLAGS.lr_decay_rate,
                     FLAGS.lr_decay_step,
                     optimizer='Adam')
    print('=====Model Build=====\n')

    merged_summary = tf.summary.merge_all()
    """ Training
    """
    #device_count = {'GPU': 0} if FLAGS.use_gpu else {'GPU': 0}
    with tf.Session() as sess:
        # Create tensorboard
        train_writer = tf.summary.FileWriter(train_log_save_dir, sess.graph)
        test_writer = tf.summary.FileWriter(test_log_save_dir, sess.graph)

        # Create model saver
        saver = tf.train.Saver(max_to_keep=None)

        # Init all vars
        init_op = tf.global_variables_initializer()
        sess.run(init_op)

        #Restore pretrained weights
        if FLAGS.pretrained_model != '':
            if FLAGS.pretrained_model.endswith('.pkl'):
                model.load_weights_from_file(FLAGS.pretrained_model,
                                             sess,
                                             finetune=True)

                # Check weights
                for variable in tf.trainable_variables():
                    with tf.variable_scope('', reuse=True):
                        var = tf.get_variable(variable.name.split(':0')[0])
                        print(variable.name, np.mean(sess.run(var)))

            else:

                saver.restore(
                    sess,
                    os.path.join('models/weights/2018_05_19_12',
                                 FLAGS.pretrained_model))

                # check weights
                for variable in tf.trainable_variables():
                    with tf.variable_scope('', reuse=True):
                        var = tf.get_variable(variable.name.split(':0')[0])
                        print(variable.name, np.mean(sess.run(var)))

        for training_itr in range(FLAGS.training_iters):
            t1 = time.time()

            # Read one batch data
            batch_x_np, batch_gt_heatmap_np, batch_cmap = next(data_generator)

            # Forward and update weights
            stage_losses_np, total_loss_np, _, summaries, current_lr, \
            stage_heatmap_np, global_step = sess.run([model.stage_loss,
                                                      model.total_loss,
                                                      model.train_op,
                                                      merged_summary,
                                                      model.cur_lr,
                                                      model.stage_heatmap,
                                                      model.global_step
                                                      ],
                                                     feed_dict={model.input_images: batch_x_np,
                                                                model.gt_hmap_placeholder: batch_gt_heatmap_np,
                                                                model.cmap_placeholder: batch_cmap})

            # Show training info
            print_current_training_stats(global_step, current_lr,
                                         stage_losses_np, total_loss_np,
                                         time.time() - t1)

            # Write logs
            train_writer.add_summary(summaries, global_step)
            # Draw intermediate results
            if not os.path.exists(FLAGS.result_dir):
                os.makedirs(FLAGS.result_dir)
            show_img = (batch_x_np[0] + 0.5) * 256
            img_save, joint_coord_set = visualize_result(
                show_img, stage_heatmap_np, FLAGS.num_of_joints,
                FLAGS.heatmap_size, FLAGS.joint_color_code)
            cv2.imwrite(
                FLAGS.result_dir + '/result' + str(training_itr) + '.jpg',
                img_save)
            hm = np.expand_dims(batch_gt_heatmap_np, axis=0)
            img_save, joint_coord_set = visualize_result(
                show_img, hm, FLAGS.num_of_joints, FLAGS.heatmap_size,
                FLAGS.joint_color_code)
            cv2.imwrite(
                FLAGS.result_dir + '/label' + str(training_itr) + '.jpg',
                img_save)
            # Draw intermediate results
            # if (global_step + 1) % 10 == 0:
            # if FLAGS.color_channel == 'GRAY':
            # demo_img = np.repeat(batch_x_np[0], 3, axis=2)
            # if FLAGS.normalize_img:
            # demo_img += 0.5
            # else:
            # demo_img += 128.0
            # demo_img /= 255.0
            # elif FLAGS.color_channel == 'RGB':
            # if FLAGS.normalize_img:
            # demo_img = batch_x_np[0] + 0.5
            # else:
            # demo_img += 128.0
            # demo_img /= 255.0
            # else:
            # raise ValueError('Non support image type.')

            # demo_stage_heatmaps = []
            # for stage in range(FLAGS.cpm_stages):
            # demo_stage_heatmap = stage_heatmap_np[stage][0, :, :, 0:FLAGS.num_of_joints].reshape(
            # (FLAGS.heatmap_size, FLAGS.heatmap_size, FLAGS.num_of_joints))
            # demo_stage_heatmap = cv2.resize(demo_stage_heatmap, (FLAGS.input_size, FLAGS.input_size))
            # demo_stage_heatmap = np.amax(demo_stage_heatmap, axis=2)
            # demo_stage_heatmap = np.reshape(demo_stage_heatmap, (FLAGS.input_size, FLAGS.input_size, 1))
            # demo_stage_heatmap = np.repeat(demo_stage_heatmap, 3, axis=2)
            # demo_stage_heatmaps.append(demo_stage_heatmap)

            # demo_gt_heatmap = batch_gt_heatmap_np[0, :, :, 0:FLAGS.num_of_joints].reshape(
            # (FLAGS.heatmap_size, FLAGS.heatmap_size, FLAGS.num_of_joints))
            # demo_gt_heatmap = cv2.resize(demo_gt_heatmap, (FLAGS.input_size, FLAGS.input_size))
            # demo_gt_heatmap = np.amax(demo_gt_heatmap, axis=2)
            # demo_gt_heatmap = np.reshape(demo_gt_heatmap, (FLAGS.input_size, FLAGS.input_size, 1))
            # demo_gt_heatmap = np.repeat(demo_gt_heatmap, 3, axis=2)

            # if FLAGS.cpm_stages > 4:
            # upper_img = np.concatenate((demo_stage_heatmaps[0], demo_stage_heatmaps[1], demo_stage_heatmaps[2]),
            # axis=1)
            # if FLAGS.normalize_img:
            # blend_img = 0.5 * demo_img + 0.5 * demo_gt_heatmap
            # else:
            # blend_img = 0.5 * demo_img / 255.0 + 0.5 * demo_gt_heatmap
            # lower_img = np.concatenate((demo_stage_heatmaps[FLAGS.cpm_stages - 1], demo_gt_heatmap, blend_img),
            # axis=1)
            # demo_img = np.concatenate((upper_img, lower_img), axis=0)
            # cv2.imshow('current heatmap', (demo_img * 255).astype(np.uint8))
            # cv2.waitKey(1000)
            # else:
            # upper_img = np.concatenate((demo_stage_heatmaps[FLAGS.cpm_stages - 1], demo_gt_heatmap, demo_img),
            # axis=1)
            # cv2.imshow('current heatmap', (upper_img * 255).astype(np.uint8))
            # cv2.waitKey(1000)

            if (global_step + 1) % FLAGS.validation_iters == 0:
                mean_val_loss = 0
                cnt = 0

                while cnt < 10:
                    batch_x_np, batch_gt_heatmap_np, batch_cmap = next(
                        data_generator_eval)

                    total_loss_np, summaries = sess.run(
                        [model.total_loss, merged_summary],
                        feed_dict={
                            model.input_images: batch_x_np,
                            model.gt_hmap_placeholder: batch_gt_heatmap_np,
                            model.cmap_placeholder: batch_cmap
                        })
                    mean_val_loss += total_loss_np
                    cnt += 1

                print('\nValidation loss: {:>7.2f}\n'.format(mean_val_loss /
                                                             cnt))
                test_writer.add_summary(summaries, global_step)

            # Save models
            if (global_step + 1) % FLAGS.model_save_iters == 0:
                saver.save(sess=sess,
                           save_path=model_save_dir + '/' +
                           FLAGS.network_def.split('.py')[0],
                           global_step=(global_step + 1))
                print('\nModel checkpoint saved...\n')

            # Finish training
            if global_step == FLAGS.training_iters:
                break
    print('Training done.')
Beispiel #39
0
def train(path_to_train, data_frame, pretrained_weights, save_dir, batch_size,
          shape, lr, val_ratio, epochs):
    model = create_discriminant_model((shape[0], shape[1], 1),
                                      pretrained_weights)
    for layer in model.layers[:-2]:
        layer.is_trainable = False
    model.compile(
        # loss=[weighted_binary_crossentropy],
        loss='binary_crossentropy',
        # optimizer=SGD(lr=1e-4,momentum=0.9),
        optimizer=Adam(lr=lr),
        metrics=['acc', f1])
    model.summary()
    checkpoint = ModelCheckpoint(str(
        save_dir.joinpath('next_base.model%f.epoch{epoch:02d}' % lr)),
                                 monitor='val_loss',
                                 verbose=1,
                                 save_best_only=True,
                                 save_weights_only=False,
                                 mode='min',
                                 period=1)
    #lr_sched = step_decay_schedule(initial_lr=1e-3, step_size=epochs, min_lr=1e-4)

    use_multiprocessing = False  # DO NOT COMBINE MULTIPROCESSING WITH CACHE!
    workers = 1  # DO NOT COMBINE MULTIPROCESSING WITH CACHE!

    val_sample_num = np.floor(len(data_frame) * val_ratio).astype(int)
    val_indices = np.random.choice(range(0, len(data_frame)),
                                   val_sample_num,
                                   replace=False)
    val_data = data_frame.iloc[val_indices, ]
    pathsVal, labelsVal = DataSet.getValidationDataset(path_to_train, val_data,
                                                       False)
    pd.DataFrame([pathsVal
                  ]).to_csv(str(save_dir.joinpath("validation_paths.csv")),
                            mode='w',
                            header=False,
                            index=False)

    train_indices = np.setdiff1d(range(0, len(data_frame)),
                                 val_indices)  #train_indicesはここではまだ昇順(未シャッフル)
    train_indices = np.random.permutation(train_indices)
    train_data = data_frame.iloc[train_indices, ]
    pathsTrain, labelsTrain = DataSet.getTrainDataset(path_to_train,
                                                      train_data, False)
    pd.DataFrame([pathsTrain
                  ]).to_csv(str(save_dir.joinpath("train_paths.csv")),
                            mode='w',
                            header=False,
                            index=False)

    print(pathsTrain.shape, labelsTrain.shape, pathsVal.shape, labelsVal.shape)
    tg = ProteinDataGenerator(pathsTrain,
                              labelsTrain,
                              batch_size,
                              shape,
                              is_mask=False,
                              use_cache=True,
                              augment=True,
                              shuffle=True)
    vg = ProteinDataGenerator(pathsVal,
                              labelsVal,
                              batch_size,
                              shape,
                              is_mask=False,
                              use_cache=True,
                              shuffle=False)

    tb = TensorBoard(log_dir=str(save_dir.joinpath('tb_logs')),
                     histogram_freq=0,
                     batch_size=batch_size)

    hist = model.fit_generator(tg,
                               steps_per_epoch=len(tg),
                               validation_data=vg,
                               validation_steps=8,
                               epochs=epochs,
                               use_multiprocessing=use_multiprocessing,
                               workers=workers,
                               verbose=1,
                               callbacks=[tb, checkpoint])

    loss_list = hist.history["loss"]
    val_loss_list = hist.history["val_loss"]
    f1_list = hist.history["f1"]
    val_f1_list = hist.history["val_f1"]
    #histories.append(hist)
    pd.DataFrame([loss_list, val_loss_list, f1_list, val_f1_list],
                 index=['loss', 'val_loss', 'f1',
                        'val_f1']).to_csv(str(res_dir.joinpath("results.csv")),
                                          mode='w',
                                          header=False,
                                          index=True)

    fig, ax = plt.subplots(1, 2, figsize=(15, 5))
    ax[0].set_title('loss')
    ax[0].plot(np.linspace(1, 50, 50), loss_list, label="Train loss")
    ax[0].plot(np.linspace(1, 50, 50), val_loss_list, label="Validation loss")
    ax[1].set_title('acc')
    ax[1].plot(np.linspace(1, 50, 50), f1_list, label="Train F1")
    ax[1].plot(np.linspace(1, 50, 50), val_f1_list, label="Validation F1")
    ax[0].legend()
    ax[1].legend()
    plt.savefig(str(save_dir.joinpath('fcnn_model_Adam%f.png' % lr)))

    return 1
Beispiel #40
0
def main(argv):
    global TrainSet
    global TestSet

    letters = "i:t"
    keywords = ["input=", "test="]
    trainfile = ""
    testfile = ""

    # run the algorithm by: python MainGA --input=train.txt --test=test.txt
    try:
        opts, arg = getopt.getopt(sys.argv[1:], letters, keywords)
    except getopt.GetoptError:
        print "GetoptError: -i <trainfile>"
        sys.exit(2)
    for opt, arg in opts:
        if opt in ("-i", "--input"):
            trainfile = arg
        if opt in ("-t", "--test"):
            testfile = arg

    if trainfile:
        trainF = open(trainfile, "r")
        TrainSet = DataSet(False, trainF, iB, sB)
        trainF.close()
        TrainSet.set_dataset_filename(str(trainfile))
        TrainSet.set_generations(GENERATIONS)
        TrainSet.set_pop_size(POPULATION)
        TrainSet.set_mutation_rate(MUTATION_RATE)
        genome = G1DList.G1DList(SIZE_OF_CHROMOSOMES)
        # * 2 to ascending and descending sorts, otherwise you'll have just ascending order
        genome.setParams(rangemin=1, rangemax=(TrainSet.FeaturesNum) * 2 * sB)
        genome.evaluator.set(eval_func)  # change here if you want a different fitness function
        ga = GSimpleGA.GSimpleGA(genome)
        ga.setGenerations(GENERATIONS)  # changes the # of generations(default 100)
        ga.setPopulationSize(POPULATION)  # changes the # of individuals(default 80)
        # ga.setMutationRate(MUTATION_RATE)   # --> use it when you want to change the Mutation Rate (default 0.02)
        # ga.setCrossoverRate(CROSS_OVER_RATE) # --> use it when you want to change the Crossover Rate (default 0.8)
        # ga.setMultiProcessing(True) # --> please read this: http://pyevolve.sourceforge.net/wordpress/?p=843

        ga.evolve(freq_stats=10)

        chromosome = ga.bestIndividual().getInternalList()  # the chromosome selected by GA
        # print chromosome ---> you can print the chromosome to see what was selected
        TrainSet.set_best_individual(chromosome)
        TrainSet.sort_dataset_by_chromosome(chromosome)
        TrainSet.write_scores(isTrain=True)

    else:
        sys.exit("GA_Algorithm: A train file is required.")

    if testfile:
        testF = open(testfile, "r")
        TestSet = DataSet(True, testF, iB, sB)
        testF.close()
        TestSet.set_dataset_filename(str(testfile))
        TestSet.set_generations(GENERATIONS)
        TestSet.set_pop_size(POPULATION)
        TestSet.set_mutation_rate(MUTATION_RATE)
        TestSet.isTest = True
        TestSet.sort_dataset_by_chromosome(
            TrainSet.get_best_individual()
        )  # order the testset with chromosome found by GA with trainset
        TestSet.write_scores()
Beispiel #41
0
checkpoint = tf.train.get_checkpoint_state(MODEL_DIR)
if checkpoint and checkpoint.model_checkpoint_path:
    saver.restore(session, checkpoint.model_checkpoint_path)
    print("Loaded checkpoint: {}".format(checkpoint.model_checkpoint_path))
else:
    print("Unable to load checkpoint")

counter = 0

print(len(DataSet.TRAIN_DATASET.images))

saver.save(session, os.path.join(MODEL_DIR, "network"), global_step=counter)

for epoch in range(3):
    print(epoch)
    for images, labels in DataSet.iter_batches(50):
        counter += 1
        if counter % 100 == 0:
            print(counter)

            acc, summ = session.run([model.accuracy, summary], feed_dict = {
                model.input_var: images,
                model.corr_labels: labels,
                model.keep_prob: 1.0
            })

            writer.add_summary(summ, counter)
            print("iteration {}, training accuracy {}".format(counter, acc))

        session.run([model.train], feed_dict = {
            model.input_var: images,