def featureSelection(path): dataset = DataSet() class_name='' TrainSet=[] for root, dirs,files in os.walk(path): #print root if dirs==[]: if class_name != os.path.basename(root): class_name = os.path.basename(root) print class_name class_count = len(files) freq_map={} for f in files: temp_set = set() #print class_name , " <--> " ,f with open (os.path.join(root,f),'r') as fin: lines = fin.readlines() for line in lines: for token in wordpunct_tokenize(line): if token not in punctuation: temp_set.add(token.lower()) for token in temp_set: if freq_map.has_key(token): freq_map[token] = freq_map[token] + 1 else: freq_map[token]=1 dataset.add_new_class(SNLPClass(class_name, freq_map,class_count)) return dataset
def add_source(self): filename = tkFileDialog.askopenfilename() if filename: dataset = DataSet() dataset.readFromFile(filename) self.datasets.append(dataset) self.sourcelist.insert(END, str(dataset))
def main(): if len(sys.argv) < 2: print 'Usage: python dataProcess.py [filenames]' fnames = sys.argv[1:] for fname in fnames: f = open(fname) dataArr = json.loads(f.read()) lowerDataArr = sanitizeDataDict(dataArr) ds = DataSet(lowerDataArr) classHist = ds.getHistRepr("difficulty") print classHist classHistBinned = BinnedDataDict(classHist) print classHistBinned
def main(argv = None): if argv is None: argv = sys.argv if len(argv) < 3: print 'Please specify a training set and its metadata file.' return 1 dt = DataSet(argv[1], argv[2]) #print dt.attributes #print dt.get_subset([1,2]).get_values('winner') print 'entropy: ' + str(dt.get_subset().entropy()) #print dt.data['winner'] #print dt.entropy() #print dt.unclassified #print sys.getsizeof(dt.data['winner']['dataPoints']) #print dt.data['winner']['dataPoints'] return 0
import csv import numpy as np import sys from HMM import * from DataSet import * filename = sys.argv[1] dSet = DataSet(filename) dSet.readFile(200, "train") hmm = HMM(16, 4, dSet.trainState, dSet.trainOutput) hmm.train()
# transforms transform_input_image = transforms.Compose([ transforms.Resize((256, 256)), transforms.ColorJitter(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) transform_target_image = transforms.Compose([ transforms.Resize((256, 256)), # transforms.ToTensor() ]) train_set = DataSet.ImageSegmentationDataset(root_train_txt, root_segm, root_images, transform_input_image, transform_target_image) train_loader = DataLoader(train_set, batch_size=8, shuffle=True) test_set = DataSet.ImageSegmentationDataset(root_test_txt, root_segm, root_images, transform_input_image, transform_target_image) test_loader = DataLoader(test_set, batch_size=8, shuffle=True) #net = Network.UNet(in_channel=3, out_channel=21).cuda() net = torch.load( '/home/freeaccess/PycharmProjects/VOCPascal/Last folder/val_loss_try_second60' ) '''def one_hot(batch_idx, target, class_count):
import csv from tkinter import * from Musician import * from DataSet import * options2 = [] musicians = [] RowCount = 0 with open('MusiciansFakeFile.txt', newline='') as inputfile: for row in csv.reader(inputfile): RowCount = RowCount + 1 musicians.append( Musician(row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7])) DS = DataSet(musicians) FrequentGenre = DS.GenreCount.index(max(DS.GenreCount)) Qanswer = "" def YesCallBack(): Qanswer = "Yes" def NoCallBack(): Qanswer = "No" root = Tk() root.geometry("300x500")
class Individual: DataSet = DataSet() def Classification(self, Case): A = self.Genotype.Gene(0).BitString2Int() B = self.Genotype.Gene(1).BitString2Int() C = self.Genotype.Gene(2).BitString2Int() D = self.Genotype.Gene(3).BitString2Int() E = self.Genotype.Gene(4).BitString2Int() F = self.Genotype.Gene(5).BitString2Int() Number = (Case.PetalLength) * (A / B) + (Case.PetalWidth) * (C / D) + ( E / F) ## print('Number: ' + str(Number)) DifferenceToZero = abs(Number - 0) DifferenceToOne = abs(Number - 1) DifferenceToTwo = abs(Number - 2) ## if (DifferenceToZero <= DifferenceToOne) and (DifferenceToZero <= DifferenceToTwo): ## return 0 ## elif (DifferenceToOne <= DifferenceToZero) and (DifferenceToOne <= DifferenceToTwo): ## return 1 ## else: ## return 2 if abs(Number) > 1.5: return 2 elif abs(Number) > .5: return 1 else: return 0 def Fitness(self): # returns a numeric value as the raw fitness if self.Genotype.Invalid(): return 0 CorrectnessCount = 0 for Case in Individual.DataSet.ListOfCases: if self.Classification(Case) == Case.Classification: CorrectnessCount += 1 return CorrectnessCount def WriteCorrectnessToFile(self): if self.Genotype.Invalid(): return 0 OutputFile = open('Classifier1.txt', 'w') CorrectnessCount = 0 for Case in Individual.DataSet.ListOfCases: OutputFile.write("%s\n" % (self.Classification(Case))) if self.Classification(Case) == Case.Classification: CorrectnessCount += 1 return CorrectnessCount OutputFile.close() def Solution(self): # returns a numeric value as the solution to the fitness function return Individual.DataSet.Length def FitnessAsPercentage(self, Fitness): CorrectnessFraction = Fitness / Individual.DataSet.Length return round(CorrectnessFraction * 100, 4) def __init__(self): ## self.Genotype = XAsBitString + YAsBitString ## self.Genotype = '0' self.Genotype = Genotype(6, -100, 100, 7) def OnePointCrossover(self, OtherIndividual): CrossoverIndex = randint(0, len(self.Genotype.BitString())) ## print('CrossoverIndex: ' + str(CrossoverIndex)) SelfLeftHalf = self.Genotype.BitString()[:CrossoverIndex] SelfRightHalf = self.Genotype.BitString()[CrossoverIndex:] ## print('I1: ' + SelfLeftHalf + ' ' + SelfRightHalf) OtherLeftHalf = OtherIndividual.Genotype.BitString()[:CrossoverIndex] OtherRightHalf = OtherIndividual.Genotype.BitString()[CrossoverIndex:] ## print('I2: ' + OtherLeftHalf + ' ' + OtherRightHalf) self.Genotype.SetBitString(SelfLeftHalf + OtherRightHalf) OtherIndividual.Genotype.SetBitString(OtherLeftHalf + SelfRightHalf) ## print('I1: ' + self.Genotype.BitString()[:CrossoverIndex] + ' ' + self.Genotype.BitString()[CrossoverIndex:]) ## print('I2: ' + OtherIndividual.Genotype.BitString()[:CrossoverIndex] + ' ' + OtherIndividual.Genotype.BitString()[CrossoverIndex:]) def TwoPointCrossover(self, OtherIndividual): LeftCrossoverIndex = randint(0, len(self.Genotype.BitString())) RightCrossoverIndex = randint(LeftCrossoverIndex, len(self.Genotype.BitString())) ## print('LeftCrossoverIndex: ' + str(LeftCrossoverIndex)) ## print('RightCrossoverIndex: ' + str(RightCrossoverIndex)) SelfLeftThird = self.Genotype.BitString()[:LeftCrossoverIndex] SelfMiddleThird = self.Genotype.BitString( )[LeftCrossoverIndex:RightCrossoverIndex] SelfRightThird = self.Genotype.BitString()[RightCrossoverIndex:] ## print('I1: ' + SelfLeftThird + ' ' + SelfMiddleThird + ' ' + SelfRightThird) OtherLeftThird = OtherIndividual.Genotype.BitString( )[:LeftCrossoverIndex] OtherMiddleThird = OtherIndividual.Genotype.BitString( )[LeftCrossoverIndex:RightCrossoverIndex] OtherRightThird = OtherIndividual.Genotype.BitString( )[RightCrossoverIndex:] ## print('I2: ' + OtherLeftThird + ' ' + OtherMiddleThird + ' ' + OtherRightThird) self.Genotype.SetBitString(OtherMiddleThird + SelfLeftThird + SelfRightThird) OtherIndividual.Genotype.SetBitString(SelfMiddleThird + OtherLeftThird + OtherRightThird) NewLeftCrossoverIndex = RightCrossoverIndex - LeftCrossoverIndex ## print('I1: ' + self.Genotype.BitString()[:NewLeftCrossoverIndex] + ' ' + self.Genotype.BitString()[NewLeftCrossoverIndex:RightCrossoverIndex] + ' ' + self.Genotype.BitString()[RightCrossoverIndex:]) ## print('I2: ' + OtherIndividual.Genotype.BitString()[:NewLeftCrossoverIndex] + ' ' + OtherIndividual.Genotype.BitString()[NewLeftCrossoverIndex:RightCrossoverIndex] + ' ' + OtherIndividual.Genotype.BitString()[RightCrossoverIndex:]) def ApplyCrossover(self, OtherIndividual, NumberOfCrossoverPoints): ## Applies Crossover to self and to OtherInvidual, editing their Genotype if NumberOfCrossoverPoints == 1: self.OnePointCrossover(OtherIndividual) else: self.TwoPointCrossover(OtherIndividual) def ApplyMutation(self): RandomIndex = randint(0, len(self.Genotype.BitString()) - 1) GenotypeAsList = list(self.Genotype.BitString()) if GenotypeAsList[RandomIndex] == '1': GenotypeAsList[RandomIndex] = '0' else: GenotypeAsList[RandomIndex] = '1' NewGenotype = '' for Character in GenotypeAsList: NewGenotype += Character self.Genotype.SetBitString(NewGenotype) def ToString(self, NumberOfSpaces): return self.Genotype.BitStringToString()
def main(args): # 训练日志保存 log_dir = os.path.join('checkpoints', args.log_dir) mkdir_if_missing(log_dir) sys.stdout = logging.Logger(os.path.join(log_dir, 'log.txt')) display(args) if args.r is None: model = models.create(args.net, Embed_dim=args.dim) # load part of the model model_dict = model.state_dict() # print(model_dict) if args.net == 'bn': pretrained_dict = torch.load( 'pretrained_models/bn_inception-239d2248.pth') else: pretrained_dict = torch.load( 'pretrained_models/inception_v3_google-1a9a5a14.pth') pretrained_dict = { k: v for k, v in pretrained_dict.items() if k in model_dict } model_dict.update(pretrained_dict) model.load_state_dict(model_dict) else: # resume model model = torch.load(args.r) model = model.cuda() torch.save(model, os.path.join(log_dir, 'model.pkl')) print('initial model is save at %s' % log_dir) # fine tune the model: the learning rate for pre-trained parameter is 1/10 new_param_ids = set(map(id, model.Embed.parameters())) new_params = [p for p in model.parameters() if id(p) in new_param_ids] base_params = [p for p in model.parameters() if id(p) not in new_param_ids] param_groups = [{ 'params': base_params, 'lr_mult': 0.1 }, { 'params': new_params, 'lr_mult': 1.0 }] optimizer = torch.optim.Adam(param_groups, lr=args.lr, weight_decay=args.weight_decay) criterion = losses.create(args.loss, alpha=args.alpha, k=args.k).cuda() data = DataSet.create(args.data, root=None, test=False) train_loader = torch.utils.data.DataLoader( data.train, batch_size=args.BatchSize, sampler=RandomIdentitySampler(data.train, num_instances=args.num_instances), drop_last=True, num_workers=args.nThreads) for epoch in range(args.start, args.epochs): running_loss = 0.0 for i, data in enumerate(train_loader, 0): inputs, labels = data # wrap them in Variable inputs = Variable(inputs.cuda()) labels = Variable(labels).cuda() optimizer.zero_grad() embed_feat = model(inputs) loss, inter_, dist_ap, dist_an = criterion(embed_feat, labels) if args.orth > 0: loss = orth_reg(model, loss, cof=args.orth) loss.backward() optimizer.step() running_loss += loss.data[0] if epoch == 0 and i == 0: print(50 * '#') print('Train Begin -- HA-HA-HA') print( '[Epoch %05d]\t Loss: %.3f \t Accuracy: %.3f \t Pos-Dist: %.3f \t Neg-Dist: %.3f' % (epoch + 1, running_loss, inter_, dist_ap, dist_an)) if epoch % args.save_step == 0: torch.save(model, os.path.join(log_dir, '%d_model.pkl' % epoch))
n[dot][email protected] bajaj[dot][email protected] ''' import numpy as np import matplotlib.pyplot as plt from matplotlib.gridspec import GridSpec import DataSet as ds from LogisticRegression import LR np.random.seed(1) plt.close('all') dtype = ['MOONS', 'GAUSSIANS', 'LINEAR', 'SINUSOIDAL', 'SPIRAL'] X, y, _ = ds.create_dataset(200, dtype[3], 0.05, varargin='PRESET') print(X.shape, y.shape) means = np.mean(X, 1).reshape(X.shape[0], -1) stds = np.std(X, 1).reshape(X.shape[0], -1) X = (X - means) / stds Clf = LR(X, y, alpha=0.003, polyfit=True, degree=5, lambd=2) plt.ion() fig = plt.figure(figsize=(8, 4)) gs = GridSpec(1, 2) ax1 = fig.add_subplot(gs[0, 0]) ax2 = fig.add_subplot(gs[0, 1])
'arsons', 'arsonsPerPop', 'violentPerPop', 'nonViolPerPop', ] from DataSet import * from PCA import * #print("\n\nstd:", oli.X.std()) #print("\n\nmean:", oli.X.mean()) #print("\n\nrange:", oli.X.max()-oli.X.min()) #Oli.PCA(oli) crime = DataSet(datafile='../data/normalized.csv') crime = crime.drop(['state', 'communityname']) # Drop strings crime = crime.drop(['countyCode','communityCode']) # Drop nominals crime = crime.drop_columns([ 'fold', 'murders', 'murdPerPop', 'rapes', 'rapesPerPop', 'robberies', 'robbbPerPop', # 'assaults', 'assaultPerPop', 'burglaries', 'burglPerPop', 'larcenies', 'larcPerPop', 'autoTheft', 'autoTheftPerPop', 'arsons', 'arsonsPerPop', 'ViolentCrimesPerPop', 'nonViolPerPop',
def main(): # data_path = raw_input("Enter path : ") DataSet.read_faces(data_path) print 2 * "\n*******************************************"
class GraphWindow(Toplevel): def __init__(self, parent, datasets=[]): Toplevel.__init__(self, parent) self.graph_frame = GraphFrame(self, datasets) Button(self.graph_frame, text="Close", fg="red", command=self.windowClosing).pack(side=RIGHT) self.graph_frame.pack(fill=BOTH, expand=1) self.protocol("WM_DELETE_WINDOW", self.windowClosing) def windowClosing(self): self.graph_frame.graph.cleanup() self.destroy() if __name__ == '__main__': root = Tk() data = DataSet() GF = GraphFrame(root, [data]) GF.pack(fill=BOTH, expand=1) Button(root, text='Clear', command=GF.graph.clear).pack(side=LEFT) Button(root, text='Redraw', command=GF.graph.replot).pack(side=LEFT) Button(root, text='Quit', command=root.quit).pack(side=RIGHT) if len(argv) > 1: data.readFromFile(argv[-1]) root.mainloop()
def lowerbound(dataset_name, image_index, game_type, eta, tau): NN = NeuralNetwork(dataset_name) NN.load_network() print("Dataset is %s." % NN.data_set) NN.model.summary() dataset = DataSet(dataset_name, 'testing') image = dataset.get_input(image_index) (label, confidence) = NN.predict(image) label_str = NN.get_label(int(label)) print( "Working on input with index %s, whose class is '%s' and the confidence is %s." % (image_index, label_str, confidence)) print("The second player is being %s." % game_type) path = "%s_pic/idx_%s_label_[%s]_with_confidence_%s.png" % ( dataset_name, image_index, label_str, confidence) NN.save_input(image, path) if game_type == 'cooperative': tic = time.time() cooperative = CooperativeAStar(dataset_name, image_index, image, NN, eta, tau) cooperative.play_game(image) if cooperative.ADVERSARY_FOUND is True: elapsed = time.time() - tic adversary = cooperative.ADVERSARY adv_label, adv_confidence = NN.predict(adversary) adv_label_str = NN.get_label(int(adv_label)) print( "\nFound an adversary within pre-specified bounded computational resource. " "\nThe following is its information: ") print("difference between images: %s" % (diffImage(image, adversary))) l2dist = l2Distance(image, adversary) l1dist = l1Distance(image, adversary) l0dist = l0Distance(image, adversary) percent = diffPercent(image, adversary) print("L2 distance %s" % l2dist) print("L1 distance %s" % l1dist) print("L0 distance %s" % l0dist) print("manipulated percentage distance %s" % percent) print("class is changed into '%s' with confidence %s\n" % (adv_label_str, adv_confidence)) path = "%s_pic/idx_%s_modified_into_[%s]_with_confidence_%s.png" % ( dataset_name, image_index, adv_label_str, adv_confidence) NN.save_input(adversary, path) if eta[0] == 'L0': dist = l0dist elif eta[0] == 'L1': dist = l1dist elif eta[0] == 'L2': dist = l2dist else: print("Unrecognised distance metric.") path = "%s_pic/idx_%s_modified_diff_%s=%s_time=%s.png" % ( dataset_name, image_index, eta[0], dist, elapsed) NN.save_input(np.absolute(image - adversary), path) else: print("Adversarial distance exceeds distance budget.") elif game_type == 'competitive': competitive = CompetitiveAlphaBeta(image, NN, eta, tau) competitive.play_game(image) else: print("Unrecognised game type. Try 'cooperative' or 'competitive'.")
from Input import * from DataSet import * if __name__ == "__main__": data1 = "pop" data2 = "pop" country1 = "us" country2 = "us" state1 = "Florida" state2 = "New York" county1 = "" county2 = "" start = "1991" end = "2006" uinput = Input(data1 ,data2, country1, country2, state1, state2, county1, county2, start, end) d1 = DataSet(uinput.makeDictionary(1)) print d1.data d2 = DataSet(uinput.makeDictionary(2)) print d2.data r = d1.calcR(d2) print r
val_loss /= len(test_loader) val_dice0 /= len(test_loader) val_dice1 /= len(test_loader) val_dice2 /= len(test_loader) val_dice3 /= len(test_loader) print('\nTest set: Average loss: {:.6f},\tdice0: {:.6f}\tdice1: {:.6f}\tdice2: {:.6f}\tdice3: {:.6f}\n'.format( val_loss, val_dice0, val_dice1, val_dice2, val_dice3)) if __name__ == '__main__': args = config.args device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # data info test_set = DataSet(args.class_num,args.crop_size, args.resize_scale, args.dataset_path, mode='test') test_loader = DataLoader(dataset=test_set,batch_size=args.batch_size,num_workers=1, shuffle=False) # model info # model = UNet(1, [32, 48, 64, 96, 128], 3, net_mode='3d',conv_block=RecombinationBlock).to(device) # model.load_state_dict(torch.load('./output/{}/state.pkl'.format(args.save))) # model = torch.load(args.model_path).to(device) # print(model) model = UNet(1, [32, 48, 64, 96, 128], args.class_num, net_mode='3d',conv_block=ResBlock).to(device) model.load_state_dict(torch.load('output/model-628-1.pth')) print(model) model.eval() test(model, test_loader) #img = sitk.ReadImage('/home/sxchongya/unet_pytorch/fixed/data/volume-5.nii.gz') #f = sitk.GetArrayFromImage(img) #f = torch.tensor(f)
def trainValidateSegmentation(args): ''' Main function for trainign and validation :param args: global arguments :return: None ''' # check if processed data file exists or not if not os.path.isfile(args.cached_data_file): dataLoad = ld.LoadData(args.data_dir, args.classes, args.cached_data_file) data = dataLoad.processData() if data is None: print('Error while pickling data. Please check.') exit(-1) else: data = pickle.load(open(args.cached_data_file, "rb")) q = args.q p = args.p # load the model if not args.decoder: model = net.ESPNet_Encoder(args.classes, p=p, q=q) args.savedir = args.savedir + '_enc_' + str(p) + '_' + str(q) + '/' else: model = net.ESPNet(args.classes, p=p, q=q, encoderFile=args.pretrained) args.savedir = args.savedir + '_dec_' + str(p) + '_' + str(q) + '/' if args.onGPU: model = model.cuda() # create the directory if not exist if not os.path.exists(args.savedir): os.mkdir(args.savedir) if args.visualizeNet: x = Variable(torch.randn(1, 3, args.inWidth, args.inHeight)) if args.onGPU: x = x.cuda() y = model.forward(x) g = viz.make_dot(y) g.render(args.savedir + 'model.png', view=False) total_paramters = netParams(model) print('Total network parameters: ' + str(total_paramters)) # define optimization criteria weight = torch.from_numpy(data['classWeights']) # convert the numpy array to torch if args.onGPU: weight = weight.cuda() criteria = CrossEntropyLoss2d(weight) #weight if args.onGPU: criteria = criteria.cuda() print('Data statistics') print(data['mean'], data['std']) print(data['classWeights']) #compose the data with transforms trainDataset_main = myTransforms.Compose([ myTransforms.Normalize(mean=data['mean'], std=data['std']), myTransforms.Scale(1024, 512), myTransforms.RandomCropResize(32), myTransforms.RandomFlip(), #myTransforms.RandomCrop(64). myTransforms.ToTensor(args.scaleIn), # ]) trainDataset_scale1 = myTransforms.Compose([ myTransforms.Normalize(mean=data['mean'], std=data['std']), myTransforms.Scale(1536, 768), # 1536, 768 myTransforms.RandomCropResize(100), myTransforms.RandomFlip(), #myTransforms.RandomCrop(64), myTransforms.ToTensor(args.scaleIn), # ]) trainDataset_scale2 = myTransforms.Compose([ myTransforms.Normalize(mean=data['mean'], std=data['std']), myTransforms.Scale(1280, 720), # 1536, 768 myTransforms.RandomCropResize(100), myTransforms.RandomFlip(), #myTransforms.RandomCrop(64), myTransforms.ToTensor(args.scaleIn), # ]) trainDataset_scale3 = myTransforms.Compose([ myTransforms.Normalize(mean=data['mean'], std=data['std']), myTransforms.Scale(768, 384), myTransforms.RandomCropResize(32), myTransforms.RandomFlip(), #myTransforms.RandomCrop(64), myTransforms.ToTensor(args.scaleIn), # ]) trainDataset_scale4 = myTransforms.Compose([ myTransforms.Normalize(mean=data['mean'], std=data['std']), myTransforms.Scale(512, 256), #myTransforms.RandomCropResize(20), myTransforms.RandomFlip(), #myTransforms.RandomCrop(64). myTransforms.ToTensor(args.scaleIn), # ]) valDataset = myTransforms.Compose([ myTransforms.Normalize(mean=data['mean'], std=data['std']), myTransforms.Scale(1024, 512), myTransforms.ToTensor(args.scaleIn), # ]) # since we training from scratch, we create data loaders at different scales # so that we can generate more augmented data and prevent the network from overfitting trainLoader = torch.utils.data.DataLoader( myDataLoader.MyDataset(data['trainIm'], data['trainAnnot'], transform=trainDataset_main), batch_size=args.batch_size + 2, shuffle=True, num_workers=args.num_workers, pin_memory=True) trainLoader_scale1 = torch.utils.data.DataLoader( myDataLoader.MyDataset(data['trainIm'], data['trainAnnot'], transform=trainDataset_scale1), batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True) trainLoader_scale2 = torch.utils.data.DataLoader( myDataLoader.MyDataset(data['trainIm'], data['trainAnnot'], transform=trainDataset_scale2), batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True) trainLoader_scale3 = torch.utils.data.DataLoader( myDataLoader.MyDataset(data['trainIm'], data['trainAnnot'], transform=trainDataset_scale3), batch_size=args.batch_size + 4, shuffle=True, num_workers=args.num_workers, pin_memory=True) trainLoader_scale4 = torch.utils.data.DataLoader( myDataLoader.MyDataset(data['trainIm'], data['trainAnnot'], transform=trainDataset_scale4), batch_size=args.batch_size + 4, shuffle=True, num_workers=args.num_workers, pin_memory=True) valLoader = torch.utils.data.DataLoader( myDataLoader.MyDataset(data['valIm'], data['valAnnot'], transform=valDataset), batch_size=args.batch_size + 4, shuffle=False, num_workers=args.num_workers, pin_memory=True) if args.onGPU: cudnn.benchmark = True start_epoch = 0 if args.resume: if os.path.isfile(args.resumeLoc): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resumeLoc) start_epoch = checkpoint['epoch'] #args.lr = checkpoint['lr'] model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) logFileLoc = args.savedir + args.logFile if os.path.isfile(logFileLoc): logger = open(logFileLoc, 'a') else: logger = open(logFileLoc, 'w') logger.write("Parameters: %s" % (str(total_paramters))) logger.write("\n%s\t%s\t%s\t%s\t%s\t" % ('Epoch', 'Loss(Tr)', 'Loss(val)', 'mIOU (tr)', 'mIOU (val')) logger.flush() optimizer = torch.optim.Adam(model.parameters(), args.lr, (0.9, 0.999), eps=1e-08, weight_decay=5e-4) # we step the loss by 2 after step size is reached scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.step_loss, gamma=0.5) for epoch in range(start_epoch, args.max_epochs): scheduler.step(epoch) lr = 0 for param_group in optimizer.param_groups: lr = param_group['lr'] print("Learning rate: " + str(lr)) # train for one epoch # We consider 1 epoch with all the training data (at different scales) train(args, trainLoader_scale1, model, criteria, optimizer, epoch) train(args, trainLoader_scale2, model, criteria, optimizer, epoch) train(args, trainLoader_scale4, model, criteria, optimizer, epoch) train(args, trainLoader_scale3, model, criteria, optimizer, epoch) lossTr, overall_acc_tr, per_class_acc_tr, per_class_iu_tr, mIOU_tr = train(args, trainLoader, model, criteria, optimizer, epoch) # evaluate on validation set lossVal, overall_acc_val, per_class_acc_val, per_class_iu_val, mIOU_val = val(args, valLoader, model, criteria) save_checkpoint({ 'epoch': epoch + 1, 'arch': str(model), 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'lossTr': lossTr, 'lossVal': lossVal, 'iouTr': mIOU_tr, 'iouVal': mIOU_val, 'lr': lr }, args.savedir + 'checkpoint.pth.tar') #save the model also model_file_name = args.savedir + '/model_' + str(epoch + 1) + '.pth' torch.save(model.state_dict(), model_file_name) with open(args.savedir + 'acc_' + str(epoch) + '.txt', 'w') as log: log.write("\nEpoch: %d\t Overall Acc (Tr): %.4f\t Overall Acc (Val): %.4f\t mIOU (Tr): %.4f\t mIOU (Val): %.4f" % (epoch, overall_acc_tr, overall_acc_val, mIOU_tr, mIOU_val)) log.write('\n') log.write('Per Class Training Acc: ' + str(per_class_acc_tr)) log.write('\n') log.write('Per Class Validation Acc: ' + str(per_class_acc_val)) log.write('\n') log.write('Per Class Training mIOU: ' + str(per_class_iu_tr)) log.write('\n') log.write('Per Class Validation mIOU: ' + str(per_class_iu_val)) logger.write("\n%d\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.7f" % (epoch, lossTr, lossVal, mIOU_tr, mIOU_val, lr)) logger.flush() print("Epoch : " + str(epoch) + ' Details') print("\nEpoch No.: %d\tTrain Loss = %.4f\tVal Loss = %.4f\t mIOU(tr) = %.4f\t mIOU(val) = %.4f" % (epoch, lossTr, lossVal, mIOU_tr, mIOU_val)) logger.close()
from sklearn import preprocessing scaler = preprocessing.StandardScaler().fit(training_vectors) scaled_training_vectors = scaler.transform(training_vectors) test_scaler = preprocessing.StandardScaler().fit(test_vectors) scaled_test_vectors = test_scaler.transform(test_vectors) ################################################################################ ################################################################################ # use (my) DataSet class to provide 'next_batch' functionality to TensorFlow # Also changes labels to 'one-hot' 2D arrays import DataSet training_dataset = DataSet.load_dataset(scaled_training_vectors, training_labels) test_dataset = DataSet.load_dataset(scaled_test_vectors, test_labels) ################################################################################ ################################################################################ # Solve with (Google) TensorFlow import tensorflow as tf # Create the model # 7 elements in each feature vector, 9 possible facies x = tf.placeholder(tf.float32, [None, 7]) W = tf.Variable(tf.zeros([7, 9])) b = tf.Variable(tf.zeros([9])) y = tf.matmul(x, W) + b
import DataSet from config import * DS_INPUT = np.array(([0, 0], [1, 0], [0, 1], [1, 1]), dtype=np.float64) DS_OUTPUT = np.array(([0], [1], [1], [0]), dtype=np.float64) DATA_SET = DataSet.DataSet(DS_INPUT, DS_OUTPUT) def test_primes(): print DATA_SET config = simple_config # get setup neural_network, trainer = create_training_setup(config, (1, 2), (1, 1)) # start training print_title("test primes") trainer.train(neural_network, DATA_SET) # test test_sample = DATA_SET.getSample() neural_network.feed(test_sample.inputs) print "output: " print neural_network.get_output() print "expected: " print outMatrix print "error: ", trainer.get_average_error(neural_network, test_sample.outputs) # visual output if PLOT_NEURAL_NET: network = Visualizer.DrawNN([DEFAULT_INPUT_DIM[0]] + [config[0]] * config[1] + [DEFAULT_OUTPUT_DIM[0]])
gestureNames.append(totalGestureNames[i]) gestureNames.append('no gesture') #read datasets and add them to dataStep trainSets = [] randTestSets = [] dataStep = [] for fileName in inputFiles: ind, t = DataSet.createData(fileName, inputGestures,usedGestures) dataStep.append((ind,t)) segs = splitBySignals(dataStep) #if desired shuffle and refraction the gestures if(shuffle): dataStep = shuffleDataStep(dataStep, nFolds) #if desired stretch testset newDataStep = [] for ind, t in dataStep: indSets = [] tSets = []
# import dataio as dio import thinkstats2 as ts2 import thinkplot as tplt import DataSet import pdb from random import choice H155 = DataSet.DataSet('h155.pkl') df = H155.df print df.shape dfr = df[df['INSCOPE'] == ] print dfr.shape c1 = choice(H155.varnames.keys()) c2 = choice(H155.varnames.keys()) print c1, H155.varnames[c1] print c2, H155.varnames[c2]
def load_patterns(ds_paths, ds_files): # load pattern data dataSet = ds.DataSet(ds_paths) dataSet.read_csv_file(ds_files) #print(len(dataSet.all_patterns[192:,:])) return dataSet
'larcPerPop', # 'autoTheft', 'autoTheftPerPop', 'arsons', 'arsonsPerPop', 'violentPerPop', 'nonViolPerPop', ] from DataSet import * from PCA import * import matplotlib.pyplot as plt dataset = DataSet(data, names, class_column='State', drop_columns=drop_columns, fix_missing=FixMissing.FILLMEAN, rescale=Rescale.NORMALIZE) y = np.mat(np.zeros((len(names), 1))) TO = 50 X = dataset.X.values[0:TO, :] y = dataset.y[0:TO] N, M = X.shape classNames = dataset.classNames attributeNames = dataset.attributeNames[0:TO] # exercise 7.2.4 from pylab import *
def Model2Feature(data, net, checkpoint, root=None, nThreads=16, batch_size=100, pool_feature=False, **kargs): dataset_name = data model = models.create(net, pretrained=False, normalized=True) # resume = load_checkpoint(ckp_path) resume = checkpoint model.load_state_dict(resume['state_dict']) model.eval() model = torch.nn.DataParallel(model).cuda() data = DataSet.create(name=data, root=root, set_name='test') if dataset_name in ['shop', 'jd_test']: gallery_loader = torch.utils.data.DataLoader(data.gallery, batch_size=batch_size, shuffle=False, drop_last=False, pin_memory=True, num_workers=nThreads) query_loader = torch.utils.data.DataLoader(data.query, batch_size=batch_size, shuffle=False, drop_last=False, pin_memory=True, num_workers=nThreads) gallery_feature, gallery_labels = extract_features( model, gallery_loader, print_freq=1e5, metric=None, pool_feature=pool_feature) query_feature, query_labels = extract_features( model, query_loader, print_freq=1e5, metric=None, pool_feature=pool_feature) else: #here print('using else') data_loader = torch.utils.data.DataLoader(data.test, batch_size=batch_size, shuffle=True, drop_last=True, pin_memory=True, num_workers=nThreads) features, labels = extract_features(model, data_loader, pool_feature=pool_feature) #全等? gallery_feature, gallery_labels = query_feature, query_labels = features, labels return gallery_feature, gallery_labels, query_feature, query_labels
param_groups = model.parameters() learn_rate = args.lr # optimizer = optim.Adam(param_groups, lr=learn_rate, # weight_decay=args.weight_decay) optimizer = optim.SGD(param_groups, lr=learn_rate, momentum=0.9, weight_decay=0.00005) #get train_loader if 'mxnet' in args.net: normalize = transforms.Normalize(mean=[123, 117, 104], std=[1, 1, 1]) else: normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) data = DataSet.create(args.data, root=None, test=False) train_loader = torch.utils.data.DataLoader( data.train, batch_size=args.BatchSize, sampler=RandomIdentitySampler(data.train, num_instances=args.num_instances), drop_last=False, num_workers=args.nThreads) def adjust_learning_rate(opt_, epoch_, num_epochs): """Sets the learning rate to the initial LR decayed by 1000 at last epochs""" if epoch_ > (num_epochs - args.step): lr = args.lr * \ (0.01 ** ((epoch_ + args.step - num_epochs) / float(args.step))) for param_group in opt_.param_groups:
help='path of train data') parser.add_argument('--epoch', default=5, type=int, help='number of train epoches') parser.add_argument('--lr', default=1e-3, type=float, help='initial learning rate for Adam') args = parser.parse_args() data_transform = transforms.Compose([transforms.ToTensor()]) # The whole dataset dataset = DataSet.imageDataset(args.normal_data, args.blurry_data, transform=data_transform) # Spliting the dataset train_loader, validation_loader = prepareLoaders( dataset, shuffle_dataset=True, batch_size=args.batch_size, ) # Saving directory save_dir = os.path.join('models', args.model) # Create the saving directory if not exists if not os.path.exists(save_dir): os.mkdir(save_dir)
filesource = 'Haberman.txt' fileAttr = [ 'Age', 'Patient year of operation', 'Number of positive axillary nodes detected', ' Survival status' ] posClassification = 3 fileClass = ['1', '2'] elif index == 4: filesource = 'MammographicMasses.txt' fileAttr = ['BI-RADS', 'Age', 'Shape', 'Margin', 'Density', ' Severity'] posClassification = 5 fileClass = ['0', '1'] #chiamo il metodo che fa il parsing del file .txt del dataset scelto fileDataset = DataSet.setDataSet(filesource, fileAttr, posClassification, fileClass) #rimuove valori nel dataset con probabilita uniforme (funziona bene con i valori di p 0.1, 0.2, 0.5) def removeValAttrWithProb(dataset, p): a = 1 / p for n in range(len(dataset.examples)): for j in range(len(dataset.examples[n])): i = random.randint(1, a) if i == 1: dataset.examples[n][j] = None return dataset #creo il validation set per il 10 fold cross validation def testing(fileDataset, number):
def main(args): # s_ = time.time() save_dir = args.save_dir mkdir_if_missing(save_dir) sys.stdout = logging.Logger(os.path.join(save_dir, 'log.txt')) display(args) start = 0 model = models.create(args.net, pretrained=True, dim=args.dim) # for vgg and densenet if args.resume is None: model_dict = model.state_dict() else: # resume model print('load model from {}'.format(args.resume)) chk_pt = load_checkpoint(args.resume) weight = chk_pt['state_dict'] start = chk_pt['epoch'] model.load_state_dict(weight) model = torch.nn.DataParallel(model) model = model.cuda() # freeze BN if args.freeze_BN is True: print(40 * '#', '\n BatchNorm frozen') model.apply(set_bn_eval) else: print(40 * '#', 'BatchNorm NOT frozen') # Fine-tune the model: the learning rate for pre-trained parameter is 1/10 new_param_ids = set(map(id, model.module.classifier.parameters())) new_params = [ p for p in model.module.parameters() if id(p) in new_param_ids ] base_params = [ p for p in model.module.parameters() if id(p) not in new_param_ids ] param_groups = [{ 'params': base_params, 'lr_mult': 0.0 }, { 'params': new_params, 'lr_mult': 1.0 }] print('initial model is save at %s' % save_dir) if args.optim == 'sgd': optimizer = torch.optim.SGD(param_groups, args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) elif args.optim == 'adam': optimizer = torch.optim.Adam(param_groups, lr=args.lr, weight_decay=args.weight_decay) else: raise ValueError('Unsupported optimizer type') criterion = losses.create(args.loss, margin=args.margin, alpha=args.alpha, base=args.loss_base).cuda() # Decor_loss = losses.create('decor').cuda() data = DataSet.create(args.data, ratio=args.ratio, width=args.width, origin_width=args.origin_width, root=args.data_root) train_loader = torch.utils.data.DataLoader( data.train, batch_size=args.batch_size, sampler=FastRandomIdentitySampler(data.train, num_instances=args.num_instances), drop_last=True, pin_memory=True, num_workers=args.nThreads) # save the train information for epoch in range(start, args.epochs): train(epoch=epoch, model=model, criterion=criterion, optimizer=optimizer, train_loader=train_loader, args=args) if epoch == 1: optimizer.param_groups[0]['lr_mult'] = 0.1 if (epoch + 1) % args.save_step == 0 or epoch == 0: if use_gpu: state_dict = model.module.state_dict() else: state_dict = model.state_dict() save_checkpoint({ 'state_dict': state_dict, 'epoch': (epoch + 1), }, is_best=False, fpath=osp.join( args.save_dir, 'ckp_ep' + str(epoch + 1) + '.pth.tar'))
def cross_val(args): torch.set_default_tensor_type('torch.DoubleTensor') allele_list_9 = [ 'HLA-A*02:01', 'HLA-A*03:01', 'HLA-A*11:01', 'HLA-A*02:03', 'HLA-B*15:01', 'HLA-A*31:01', 'HLA-A*01:01', 'HLA-B*07:02', 'HLA-A*26:01', 'HLA-A*02:06', 'HLA-A*68:02', 'HLA-B*08:01', 'HLA-B*58:01', 'HLA-B*40:01', 'HLA-B*27:05', 'HLA-A*30:01', 'HLA-A*69:01', 'HLA-B*57:01', 'HLA-B*35:01', 'HLA-A*02:02', 'HLA-A*24:02', 'HLA-B*18:01', 'HLA-B*51:01', 'HLA-A*29:02', 'HLA-A*68:01', 'HLA-A*33:01', 'HLA-A*23:01' ] allele_list_10 = [ 'HLA-A*02:01', 'HLA-A*03:01', 'HLA-A*11:01', 'HLA-A*68:01', 'HLA-A*31:01', 'HLA-A*02:06', 'HLA-A*68:02', 'HLA-A*02:03', 'HLA-A*33:01', 'HLA-A*02:02' ] if not os.path.exists(args.savedir): os.mkdir(args.savedir) logFileLoc = args.savedir + os.sep + args.testFile if os.path.isfile(logFileLoc): logger = open(logFileLoc, 'a') logger.write("%s\t%s\t\t\t%s\n" % ('Length', 'Allele', 'AUC')) logger.flush() else: logger = open(logFileLoc, 'w') logger.write("%s\t%s\t\t\t%s\n" % ('Length', 'Allele', 'AUC')) logger.flush() for length in [10, 9]: if length == 9: allele_list = allele_list_9 elif length == 10: allele_list = allele_list_10 else: print("Invalid Length") exit(0) for allele in allele_list: #[9,10] model_dir = args.savedir + os.sep + 'best_model' + os.sep + allele if not os.path.isdir(model_dir): os.makedirs(model_dir) data_dict = pickle.load( open( args.data_dir + os.sep + 'pickle_' + str(length) + '_binary' + os.sep + allele.replace('*', '.').replace(':', '_') + '.p', 'rb')) print('test on allele: ' + data_dict['allele']) if not length == data_dict['sequ_length']: print('length error') exit() encode_channel = data_dict['channel_encode'] meas = data_dict['label'] # bind = [] # for i in meas: # i = (-1) * math.log10(i); # bind.append(i) sequ, label = encode_channel, meas if (len(sequ) > 5): sequ_ori, label_ori = sequ, label output_list = [] label_list = [] test_data_load = torch.utils.data.DataLoader( myDataLoader.MyDataset(sequ_ori, label_ori), batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True) model = net.ResNetC1() if args.onGPU == True: #model = torch.nn.DataParallel(model, device_ids=[0,1,2,3]).cuda() model = model.cuda() criteria = MSELoss() if args.onGPU == True: criteria = criteria.cuda() output_sum, label = [], [] for fold_num in range(1, 6): best_model_dict = torch.load(model_dir + os.sep + allele + '_' + str(length) + '_' + str(fold_num) + '.pth') model.load_state_dict(best_model_dict) _, _, output, label = val(args, test_data_load, model, criteria) if not output_sum: output_sum.extend(output) else: output_sum = [ output_sum[i] + output[i] for i in range(len(output_sum)) ] final_out = [output_sum[i] / 5 for i in range(len(output_sum))] output_list.extend(final_out) label_list.extend(label) IC_output_list = [ math.pow(10, (-1) * value) for value in output_list ] #IC_label_list = [math.pow(10, (-1) * value) for value in label_list] bi_output_list = [ 1 if ic < 500 else 0 for ic in IC_output_list ] #bi_label_list = [1 if ic < 500 else 0 for ic in IC_label_list] #pearson = pearsonr(IC_output_list, IC_label_list) auc = roc_auc_score(label_list, bi_output_list) #srcc = spearmanr(IC_output_list, IC_label_list) logger.write("%s\t%s\t\t%.4f\n" % (length, allele, auc)) logger.flush() prediction = args.savedir + os.sep + args.predict if os.path.exists(prediction): append_write = 'a' # append if already exists else: append_write = 'w' true_value = open(prediction, append_write) true_value.write("%s\n" % (allele)) for i in range(len(output_list)): true_value.write("%.4f\t%.4f\n" % (label_list[i], output_list[i])) true_value.flush() logger.close()
def load_patterns(): # load pattern data dataSet = ds.DataSet('/home/adriano/Projects/ANNDispersionRelation/ann_training/2d/square/te/tests_new_db/16_interpolated_points/') dataSet.read_csv_file('dr_te_pc_dataset.csv') #print(len(dataSet.all_patterns[192:,:])) return dataSet
def trainModel(expDir='null', ii=0): data_ = open('../data_dir.txt', 'r') datasets_dir = data_.readline().split()[0] mnist = DataSet.read_data_sets(data_dir=datasets_dir) config = ConfigParser() config.read(expDir + 'input_configuration') mode = config.get('MAIN_PARAMETER_SETTING', 'mode') l_rate = config.getfloat('MAIN_PARAMETER_SETTING', 'learning_rate') momentum = config.getfloat('MAIN_PARAMETER_SETTING', 'momentum') gamma = config.getfloat('MAIN_PARAMETER_SETTING', 'gamma') p_input = config.getfloat('MAIN_PARAMETER_SETTING', 'p_input') p_conv = config.getfloat('MAIN_PARAMETER_SETTING', 'p_conv') p_fc = config.getfloat('MAIN_PARAMETER_SETTING', 'p_fc') noise = config.getfloat('MAIN_PARAMETER_SETTING', 'noise') numepochs = config.getint('MAIN_PARAMETER_SETTING', 'training_epochs') if mode == 'scheduled_dropout': def _prob(x, gamma, p): return (1. - p) * np.exp(-gamma * x) + p elif mode == 'ann_dropout': def _prob(x, gamma, p): return -(1. - p) * np.exp(-gamma * x) + 1 elif mode == 'regular_dropout': def _prob(x, gamma, p): return p sess = tf.InteractiveSession() #(config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))) #config=tf.ConfigProto(gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.4))) x = tf.placeholder(tf.float32, shape=[None, 784]) y_ = tf.placeholder(tf.float32, shape=[None, 10]) # DROPOUT # placeholder for the probability that a neuron's output is kept during dropout # keep_prob will be give to feed_dict to control the dropout rate keep_prob_input = tf.placeholder(tf.float32) keep_prob_conv = tf.placeholder(tf.float32) keep_prob_fc = tf.placeholder(tf.float32) # FIRST CONV LAYER #dim_conv1 = int(96/p_conv) dim_conv1 = 32 # The convolutional layer computes 64 features for each 5x5 patch. # Its weight tensor has a shape of [5, 5, 3, 64] [5x5 patch, input channels, output channels] W_conv1 = weight_variable([5, 5, 1, dim_conv1], noise) # bias vector with a component for each output channel b_conv1 = bias_variable([dim_conv1], noise) #To apply the layer, we first reshape x to a 4d tensor # Second and third dimensions correspond to image width and height # Final dimension corresponding to the number of color channels. x_image = tf.reshape(x, [-1, 28, 28, 1]) x_image_drop = tf.nn.dropout(x_image, keep_prob_input) # convolve x_image with the weight tensor, add the bias, apply ReLU h_conv1 = tf.nn.relu(conv2d(x_image_drop, W_conv1) + b_conv1) # finally max pool h_pool1 = max_pool_2x2(h_conv1) # now the image is 14*14 h_pool1_drop = tf.nn.dropout(h_pool1, keep_prob_conv) # SECOND CONV LAYER # Initialize variables #dim_conv2 = int(128/p_conv) dim_conv2 = 48 W_conv2 = weight_variable([5, 5, dim_conv1, dim_conv2], noise) b_conv2 = bias_variable([dim_conv2], noise) # Contruct the graph h_conv2 = tf.nn.relu(conv2d(h_pool1_drop, W_conv2) + b_conv2) h_pool2 = max_pool_2x2(h_conv2) # now the image is 7*7 h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * dim_conv2]) h_pool2_flat_drop = tf.nn.dropout(h_pool2_flat, keep_prob_conv) #~ ## THIRD CONV LAYER #~ # Initialize variables #~ #dim_conv3 = int(256/p_fc) #~ dim_conv3 = 256 #~ W_conv3 = weight_variable([5, 5, dim_conv2, dim_conv3], noise) #~ b_conv3 = bias_variable([dim_conv3],noise) #~ # Contruct the graph #~ h_conv3 = tf.nn.relu(conv2d(h_pool2_drop, W_conv3) + b_conv3) #~ h_pool3 = max_pool_3x3(h_conv3) # now the image is 4*4? #~ h_pool3_flat = tf.reshape(h_pool3, [-1, 4 * 4 * dim_conv3]) #~ h_pool3_flat_drop = tf.nn.dropout(h_pool3_flat, keep_prob_conv) # DENSE LAYER 1 #DIM_1 = int(2048/p_fc) DIM_1 = 2048 W_fc1 = weight_variable([7 * 7 * dim_conv2, DIM_1], noise) b_fc1 = bias_variable([DIM_1], noise) h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat_drop, W_fc1) + b_fc1) h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob_fc) # DENSE LAYER 2 #DIM_2 = int(2048/p_fc) DIM_2 = 1024 W_fc2 = weight_variable([DIM_1, DIM_2], noise) b_fc2 = bias_variable([DIM_2], noise) h_fc2 = tf.nn.relu(tf.matmul(h_fc1_drop, W_fc2) + b_fc2) h_fc2_drop = tf.nn.dropout(h_fc2, keep_prob_fc) # READOUT LAYER W_out = weight_variable([DIM_2, 10], noise) b_out = bias_variable([10], noise) y_conv = tf.matmul(h_fc2_drop, W_out) + b_out # Loss Function for evaluation (i.e. compare with actual labels) cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=y_conv, labels=y_)) # the actual operation on the graph train_step = tf.train.AdamOptimizer(l_rate, beta1=momentum).minimize(cross_entropy) #train_step = tf.train.GradientDescentOptimizer(1e-4,beta1=0.999).minimize(cross_entropy) # EVALUATE THE MODEL correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1)) #correct_prediction = tf.equal(tf.nn.top_k(y_conv,2)[1], tf.nn.top_k(y_,2)[1]) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) #AP = sparse_average_precision_at_k(y_conv, tf.cast(y_, tf.int64), 1) #mAP = tf.reduce_mean(tf.cast(AP, tf.float32)) ################################################# # Run the session to initialize variables sess.run(tf.global_variables_initializer()) #sess.run(tf.local_variables_initializer()) # Keep some history accTrSet = [] accTeSet = [] accValSet = [] Xentr = [] gammaValues = [] # Some training params TRAIN_EVAL = True TEST_EVAL = True VALID = True batchsize = 128 eval_batchsize = 200 num_train_batches = mnist.train.labels.shape[0] / batchsize numiter = numepochs * num_train_batches num_test_batches = mnist.test.labels.shape[0] / eval_batchsize #num_valid_batches = mnist.validation.labels.shape[0] / eval_batchsize print("Epochs: %d \t Training batches: %d \t Iterations: %d \t Mode: %s"\ %(numepochs, num_train_batches, numiter, mode)) start_time = time.time() ## TRAINING ITERATIONS for i in range(int(numiter)): # Dropout probabilities for this iteration _prob_input = _prob(i, gamma, p_input) _prob_conv = _prob(i, gamma, p_conv) _prob_fc = _prob(i, gamma, p_fc) gammaValues.append(_prob_fc) ################################################### # calculate accuracies and cost every 500 iterations if i % 100 == 0 and i != 0: ############################################## # calculate TRAIN accuracy on the SINGLE BATCH #train_accuracy, xentropy = sess.run((accuracy, cross_entropy), #feed_dict={x:batch[0], y_: batch[1], #keep_prob_input: 1.0, keep_prob_conv: 1.0, keep_prob_fc: 1.0}) # no dropout #accTrSet.append(train_accuracy) #Xentr.append(xentropy) ############################################## # calculate TRAINING accuracy on the whole training set train_accuracy = 0. xentropy = 0. if TRAIN_EVAL: for j in range( int(num_train_batches)): # Must be done batchwise batch = mnist.train.next_batch(batchsize) t_a, x_e = sess.run( (accuracy, cross_entropy), feed_dict={ x: batch[0], y_: batch[1], keep_prob_input: 1.0, keep_prob_conv: 1.0, keep_prob_fc: 1.0 }) # no dropout train_accuracy += t_a xentropy += x_e train_accuracy = train_accuracy / num_train_batches xentropy = xentropy / num_train_batches accTrSet.append(train_accuracy) Xentr.append(xentropy) ############################################## # calculate TEST accuracy on the whole test set test_accuracy = 0. if TEST_EVAL: for j in range( int(num_test_batches)): # Must be done batchwise batch = mnist.test.next_batch(eval_batchsize) test_accuracy += accuracy.eval( feed_dict={ x: batch[0], y_: batch[1], keep_prob_input: 1.0, keep_prob_conv: 1.0, keep_prob_fc: 1.0 }) # no dropout test_accuracy = test_accuracy / num_test_batches accTeSet.append(test_accuracy) ############################################## # Perform validation for early stopping valid_accuracy = 0. if VALID: # calculate VALIDATION accuracy on the whole validation set valid_accuracy = accuracy.eval( feed_dict={ x: mnist.validation.images, y_: mnist.validation.labels, keep_prob_input: 1.0, keep_prob_conv: 1.0, keep_prob_fc: 1.0 }) # no dropout accValSet.append(valid_accuracy) #print("Droput prob: %f"%(prob)) ## Early stopping #if len(accValSet)>5 and accValSet[-1]<accValSet[-2]: #break duration = time.time() - start_time start_time = time.time() print("step %d: \t cross entropy: %f \t training accuracy: %f \t test accuracy: %f \t valid accuracy: %f \t prob: %f \t time: %f"\ %(i, xentropy, train_accuracy, test_accuracy, valid_accuracy, _prob_fc, duration)) ## The actual training step # SCHEDULING DROPOUT: no droput at first, tends to 0.5 as iterations increase batch = mnist.train.next_batch(batchsize) train_step.run( feed_dict={ x: batch[0], y_: batch[1], keep_prob_input: _prob_input, #0.9 keep_prob_conv: _prob_conv, #0.75 keep_prob_fc: _prob_fc }) #0.5 # CHANGE HERE #!# End of training iterations # # Finally test on the test set # ## Testing on small gpus must be done batch-wise to avoid OOM test_accuracy = 0 for j in range(num_test_batches): batch = mnist.test.next_batch(batchsize) test_accuracy += accuracy.eval( feed_dict={ x: batch[0], y_: batch[1], keep_prob_input: 1.0, keep_prob_conv: 1.0, keep_prob_fc: 1.0 }) # no dropout print("test accuracy: %g" % (test_accuracy / num_test_batches)) f = file(expDir + str(ii) + 'accuracies.pkl', 'w') cPickle.dump((accTrSet, accValSet, accTeSet, Xentr), f, protocol=cPickle.HIGHEST_PROTOCOL) f.close() sess.close()
from sklearn.neighbors import KNeighborsClassifier from sklearn.model_selection import train_test_split import DataSet X, y = DataSet.dataset() X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) knn = KNeighborsClassifier(n_neighbors=5, algorithm='kd_tree') knn.fit(X_train, y_train) print('模型的准确度:{}'.format(knn.score(X_test, y_test)))
def recommend(user, train, w, k, n): rank = dict() rui = 1.0 fileter = train[user] for i in fileter: for j, wij in sorted(w[i].items(), key=lambda x: x[1], reverse=True)[0:k]: if j in fileter: continue rank[j] = wij * rui rank = dict(sorted(rank.items(), key=lambda x: x[1], reverse=True)[0:n]) return rank if __name__ == '__main__': dataset = DataSet.openfile('ratings.csv') train, test = DataSet.splitdata_d(dataset) w = itemsimliarity(train) re = {} for user in test.keys(): re[user] = recommend(user, train, w, 10, 10) x = Evaluating.recall(test, re) y = Evaluating.precision(test, re) z = Evaluating.coverage(train, test, re) a = Evaluating.popularity(train, re) print(x) print(y) print(z) print(a)
import pdb from DataSet import * from AeModel import * data_set: DataSet = DataSet() ae_model: AeModel = AeModel(data_set) ae_model.define_my_model() ae_model.compile_the_model() ae_model.train_the_model() ae_model.evaluate_the_model()
parser.add_argument('-test', type=int, default=1, help='evaluation on test set or train set') args = parser.parse_args() # model = inception_v3(dropout=0.5) model = torch.load(args.r) model = model.cuda() temp = args.r.split('/') name = temp[-1][:-10] if args.test == 1: data = DataSet.create(args.data, train=False) data_loader = torch.utils.data.DataLoader(data.test, batch_size=128, shuffle=False, drop_last=False) else: data = DataSet.create(args.data, test=False) data_loader = torch.utils.data.DataLoader(data.train, batch_size=128, shuffle=False, drop_last=False) features, labels = extract_features(model, data_loader, print_freq=1e5, metric=None)
value = self.getBranch(value, branch) return self.classify(example, nodeAt) else: if nodeAt is not None: return nodeAt.getLabel() return None if __name__=="__main__": import DataSet f = "/Users/ducrix/Documents/Research/Python/data/ml/test_weather.gla" #f = "/Users/ducrix/Documents/Research/Python/data/ml/test_genders.gla" #f = "/Users/ducrix/Documents/Research/Python/data/ml/test_cars.gla" #f = "/Users/ducrix/Documents/Research/Python/data/ml/test_words.gla" ds = DataSet.DataSet(f) train, test = ds.getTrainTestSet(.1) dt = DT(train, ds.getAttributes(), 2) #print dt.data[:] #print dt.classify(test[0]) t = dt.test(test) p = [tt[0]==tt[1] for tt in t] print p, t print (1.*p.count(True))/len(p) #print len(train), [t.getValues() for t in train] #print len(test), [t.getValues() for t in test]
import tensorflow as tf from sklearn.model_selection import train_test_split import numpy as np import matplotlib.pyplot as plt import DataSet data, label = DataSet.dataset() num_classes = 8 label = np.eye(num_classes)[label.reshape(-1).astype(np.int8)] X_train, X_test, y_train, y_test = train_test_split(data, label, random_state=0, test_size=0.2) num_features = 408 num_h1 = 100 num_h2 = 100 learning_rate = 0.001 X = tf.placeholder(tf.float32, shape=(None, num_features)) y = tf.placeholder(tf.float32, shape=(None, num_classes)) weights = { 'weight_h1': tf.Variable(tf.truncated_normal((num_features, num_h1), stddev=0.1)), 'weight_h2': tf.Variable(tf.truncated_normal((num_h1, num_h2), stddev=0.1)), 'weight_ouput':
def main(argv): """ :param argv: :return: """ #os.environ['CUDA_VISIBLE_DEVICES'] = '0' """ Create dirs for saving models and logs """ model_path_suffix = datetime.datetime.now().strftime('%Y_%m_%d_%H') model_save_dir = os.path.join('models', 'weights', model_path_suffix) train_log_save_dir = os.path.join('models', 'logs', model_path_suffix, 'train') test_log_save_dir = os.path.join('models', 'logs', model_path_suffix, 'test') os.system('mkdir -p {}'.format(model_save_dir)) os.system('mkdir -p {}'.format(train_log_save_dir)) os.system('mkdir -p {}'.format(test_log_save_dir)) """ Create data generator """ data_generator = DataSet(FLAGS.train_img_dir, FLAGS.batch_size, FLAGS.input_size, FLAGS.heatmap_size, FLAGS.normalize_img, FLAGS.category, FLAGS.joint_gaussian_variance, FLAGS.num_of_joints, FLAGS.center_radius, sample_set='train').data_generator data_generator_eval = DataSet(FLAGS.val_img_dir, FLAGS.batch_size, FLAGS.input_size, FLAGS.heatmap_size, FLAGS.normalize_img, FLAGS.category, FLAGS.joint_gaussian_variance, FLAGS.num_of_joints, FLAGS.center_radius, sample_set='valid').data_generator """ Build network graph """ model = cpm_model.CPM_Model(input_size=FLAGS.input_size, heatmap_size=FLAGS.heatmap_size, stages=FLAGS.cpm_stages, joints=FLAGS.num_of_joints, img_type=FLAGS.color_channel, is_training=True) model.build_loss(FLAGS.init_lr, FLAGS.lr_decay_rate, FLAGS.lr_decay_step, optimizer='Adam') print('=====Model Build=====\n') merged_summary = tf.summary.merge_all() """ Training """ #device_count = {'GPU': 0} if FLAGS.use_gpu else {'GPU': 0} with tf.Session() as sess: # Create tensorboard train_writer = tf.summary.FileWriter(train_log_save_dir, sess.graph) test_writer = tf.summary.FileWriter(test_log_save_dir, sess.graph) # Create model saver saver = tf.train.Saver(max_to_keep=None) # Init all vars init_op = tf.global_variables_initializer() sess.run(init_op) #Restore pretrained weights if FLAGS.pretrained_model != '': if FLAGS.pretrained_model.endswith('.pkl'): model.load_weights_from_file(FLAGS.pretrained_model, sess, finetune=True) # Check weights for variable in tf.trainable_variables(): with tf.variable_scope('', reuse=True): var = tf.get_variable(variable.name.split(':0')[0]) print(variable.name, np.mean(sess.run(var))) else: saver.restore( sess, os.path.join('models/weights/2018_05_19_12', FLAGS.pretrained_model)) # check weights for variable in tf.trainable_variables(): with tf.variable_scope('', reuse=True): var = tf.get_variable(variable.name.split(':0')[0]) print(variable.name, np.mean(sess.run(var))) for training_itr in range(FLAGS.training_iters): t1 = time.time() # Read one batch data batch_x_np, batch_gt_heatmap_np, batch_cmap = next(data_generator) # Forward and update weights stage_losses_np, total_loss_np, _, summaries, current_lr, \ stage_heatmap_np, global_step = sess.run([model.stage_loss, model.total_loss, model.train_op, merged_summary, model.cur_lr, model.stage_heatmap, model.global_step ], feed_dict={model.input_images: batch_x_np, model.gt_hmap_placeholder: batch_gt_heatmap_np, model.cmap_placeholder: batch_cmap}) # Show training info print_current_training_stats(global_step, current_lr, stage_losses_np, total_loss_np, time.time() - t1) # Write logs train_writer.add_summary(summaries, global_step) # Draw intermediate results if not os.path.exists(FLAGS.result_dir): os.makedirs(FLAGS.result_dir) show_img = (batch_x_np[0] + 0.5) * 256 img_save, joint_coord_set = visualize_result( show_img, stage_heatmap_np, FLAGS.num_of_joints, FLAGS.heatmap_size, FLAGS.joint_color_code) cv2.imwrite( FLAGS.result_dir + '/result' + str(training_itr) + '.jpg', img_save) hm = np.expand_dims(batch_gt_heatmap_np, axis=0) img_save, joint_coord_set = visualize_result( show_img, hm, FLAGS.num_of_joints, FLAGS.heatmap_size, FLAGS.joint_color_code) cv2.imwrite( FLAGS.result_dir + '/label' + str(training_itr) + '.jpg', img_save) # Draw intermediate results # if (global_step + 1) % 10 == 0: # if FLAGS.color_channel == 'GRAY': # demo_img = np.repeat(batch_x_np[0], 3, axis=2) # if FLAGS.normalize_img: # demo_img += 0.5 # else: # demo_img += 128.0 # demo_img /= 255.0 # elif FLAGS.color_channel == 'RGB': # if FLAGS.normalize_img: # demo_img = batch_x_np[0] + 0.5 # else: # demo_img += 128.0 # demo_img /= 255.0 # else: # raise ValueError('Non support image type.') # demo_stage_heatmaps = [] # for stage in range(FLAGS.cpm_stages): # demo_stage_heatmap = stage_heatmap_np[stage][0, :, :, 0:FLAGS.num_of_joints].reshape( # (FLAGS.heatmap_size, FLAGS.heatmap_size, FLAGS.num_of_joints)) # demo_stage_heatmap = cv2.resize(demo_stage_heatmap, (FLAGS.input_size, FLAGS.input_size)) # demo_stage_heatmap = np.amax(demo_stage_heatmap, axis=2) # demo_stage_heatmap = np.reshape(demo_stage_heatmap, (FLAGS.input_size, FLAGS.input_size, 1)) # demo_stage_heatmap = np.repeat(demo_stage_heatmap, 3, axis=2) # demo_stage_heatmaps.append(demo_stage_heatmap) # demo_gt_heatmap = batch_gt_heatmap_np[0, :, :, 0:FLAGS.num_of_joints].reshape( # (FLAGS.heatmap_size, FLAGS.heatmap_size, FLAGS.num_of_joints)) # demo_gt_heatmap = cv2.resize(demo_gt_heatmap, (FLAGS.input_size, FLAGS.input_size)) # demo_gt_heatmap = np.amax(demo_gt_heatmap, axis=2) # demo_gt_heatmap = np.reshape(demo_gt_heatmap, (FLAGS.input_size, FLAGS.input_size, 1)) # demo_gt_heatmap = np.repeat(demo_gt_heatmap, 3, axis=2) # if FLAGS.cpm_stages > 4: # upper_img = np.concatenate((demo_stage_heatmaps[0], demo_stage_heatmaps[1], demo_stage_heatmaps[2]), # axis=1) # if FLAGS.normalize_img: # blend_img = 0.5 * demo_img + 0.5 * demo_gt_heatmap # else: # blend_img = 0.5 * demo_img / 255.0 + 0.5 * demo_gt_heatmap # lower_img = np.concatenate((demo_stage_heatmaps[FLAGS.cpm_stages - 1], demo_gt_heatmap, blend_img), # axis=1) # demo_img = np.concatenate((upper_img, lower_img), axis=0) # cv2.imshow('current heatmap', (demo_img * 255).astype(np.uint8)) # cv2.waitKey(1000) # else: # upper_img = np.concatenate((demo_stage_heatmaps[FLAGS.cpm_stages - 1], demo_gt_heatmap, demo_img), # axis=1) # cv2.imshow('current heatmap', (upper_img * 255).astype(np.uint8)) # cv2.waitKey(1000) if (global_step + 1) % FLAGS.validation_iters == 0: mean_val_loss = 0 cnt = 0 while cnt < 10: batch_x_np, batch_gt_heatmap_np, batch_cmap = next( data_generator_eval) total_loss_np, summaries = sess.run( [model.total_loss, merged_summary], feed_dict={ model.input_images: batch_x_np, model.gt_hmap_placeholder: batch_gt_heatmap_np, model.cmap_placeholder: batch_cmap }) mean_val_loss += total_loss_np cnt += 1 print('\nValidation loss: {:>7.2f}\n'.format(mean_val_loss / cnt)) test_writer.add_summary(summaries, global_step) # Save models if (global_step + 1) % FLAGS.model_save_iters == 0: saver.save(sess=sess, save_path=model_save_dir + '/' + FLAGS.network_def.split('.py')[0], global_step=(global_step + 1)) print('\nModel checkpoint saved...\n') # Finish training if global_step == FLAGS.training_iters: break print('Training done.')
def train(path_to_train, data_frame, pretrained_weights, save_dir, batch_size, shape, lr, val_ratio, epochs): model = create_discriminant_model((shape[0], shape[1], 1), pretrained_weights) for layer in model.layers[:-2]: layer.is_trainable = False model.compile( # loss=[weighted_binary_crossentropy], loss='binary_crossentropy', # optimizer=SGD(lr=1e-4,momentum=0.9), optimizer=Adam(lr=lr), metrics=['acc', f1]) model.summary() checkpoint = ModelCheckpoint(str( save_dir.joinpath('next_base.model%f.epoch{epoch:02d}' % lr)), monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='min', period=1) #lr_sched = step_decay_schedule(initial_lr=1e-3, step_size=epochs, min_lr=1e-4) use_multiprocessing = False # DO NOT COMBINE MULTIPROCESSING WITH CACHE! workers = 1 # DO NOT COMBINE MULTIPROCESSING WITH CACHE! val_sample_num = np.floor(len(data_frame) * val_ratio).astype(int) val_indices = np.random.choice(range(0, len(data_frame)), val_sample_num, replace=False) val_data = data_frame.iloc[val_indices, ] pathsVal, labelsVal = DataSet.getValidationDataset(path_to_train, val_data, False) pd.DataFrame([pathsVal ]).to_csv(str(save_dir.joinpath("validation_paths.csv")), mode='w', header=False, index=False) train_indices = np.setdiff1d(range(0, len(data_frame)), val_indices) #train_indicesはここではまだ昇順(未シャッフル) train_indices = np.random.permutation(train_indices) train_data = data_frame.iloc[train_indices, ] pathsTrain, labelsTrain = DataSet.getTrainDataset(path_to_train, train_data, False) pd.DataFrame([pathsTrain ]).to_csv(str(save_dir.joinpath("train_paths.csv")), mode='w', header=False, index=False) print(pathsTrain.shape, labelsTrain.shape, pathsVal.shape, labelsVal.shape) tg = ProteinDataGenerator(pathsTrain, labelsTrain, batch_size, shape, is_mask=False, use_cache=True, augment=True, shuffle=True) vg = ProteinDataGenerator(pathsVal, labelsVal, batch_size, shape, is_mask=False, use_cache=True, shuffle=False) tb = TensorBoard(log_dir=str(save_dir.joinpath('tb_logs')), histogram_freq=0, batch_size=batch_size) hist = model.fit_generator(tg, steps_per_epoch=len(tg), validation_data=vg, validation_steps=8, epochs=epochs, use_multiprocessing=use_multiprocessing, workers=workers, verbose=1, callbacks=[tb, checkpoint]) loss_list = hist.history["loss"] val_loss_list = hist.history["val_loss"] f1_list = hist.history["f1"] val_f1_list = hist.history["val_f1"] #histories.append(hist) pd.DataFrame([loss_list, val_loss_list, f1_list, val_f1_list], index=['loss', 'val_loss', 'f1', 'val_f1']).to_csv(str(res_dir.joinpath("results.csv")), mode='w', header=False, index=True) fig, ax = plt.subplots(1, 2, figsize=(15, 5)) ax[0].set_title('loss') ax[0].plot(np.linspace(1, 50, 50), loss_list, label="Train loss") ax[0].plot(np.linspace(1, 50, 50), val_loss_list, label="Validation loss") ax[1].set_title('acc') ax[1].plot(np.linspace(1, 50, 50), f1_list, label="Train F1") ax[1].plot(np.linspace(1, 50, 50), val_f1_list, label="Validation F1") ax[0].legend() ax[1].legend() plt.savefig(str(save_dir.joinpath('fcnn_model_Adam%f.png' % lr))) return 1
def main(argv): global TrainSet global TestSet letters = "i:t" keywords = ["input=", "test="] trainfile = "" testfile = "" # run the algorithm by: python MainGA --input=train.txt --test=test.txt try: opts, arg = getopt.getopt(sys.argv[1:], letters, keywords) except getopt.GetoptError: print "GetoptError: -i <trainfile>" sys.exit(2) for opt, arg in opts: if opt in ("-i", "--input"): trainfile = arg if opt in ("-t", "--test"): testfile = arg if trainfile: trainF = open(trainfile, "r") TrainSet = DataSet(False, trainF, iB, sB) trainF.close() TrainSet.set_dataset_filename(str(trainfile)) TrainSet.set_generations(GENERATIONS) TrainSet.set_pop_size(POPULATION) TrainSet.set_mutation_rate(MUTATION_RATE) genome = G1DList.G1DList(SIZE_OF_CHROMOSOMES) # * 2 to ascending and descending sorts, otherwise you'll have just ascending order genome.setParams(rangemin=1, rangemax=(TrainSet.FeaturesNum) * 2 * sB) genome.evaluator.set(eval_func) # change here if you want a different fitness function ga = GSimpleGA.GSimpleGA(genome) ga.setGenerations(GENERATIONS) # changes the # of generations(default 100) ga.setPopulationSize(POPULATION) # changes the # of individuals(default 80) # ga.setMutationRate(MUTATION_RATE) # --> use it when you want to change the Mutation Rate (default 0.02) # ga.setCrossoverRate(CROSS_OVER_RATE) # --> use it when you want to change the Crossover Rate (default 0.8) # ga.setMultiProcessing(True) # --> please read this: http://pyevolve.sourceforge.net/wordpress/?p=843 ga.evolve(freq_stats=10) chromosome = ga.bestIndividual().getInternalList() # the chromosome selected by GA # print chromosome ---> you can print the chromosome to see what was selected TrainSet.set_best_individual(chromosome) TrainSet.sort_dataset_by_chromosome(chromosome) TrainSet.write_scores(isTrain=True) else: sys.exit("GA_Algorithm: A train file is required.") if testfile: testF = open(testfile, "r") TestSet = DataSet(True, testF, iB, sB) testF.close() TestSet.set_dataset_filename(str(testfile)) TestSet.set_generations(GENERATIONS) TestSet.set_pop_size(POPULATION) TestSet.set_mutation_rate(MUTATION_RATE) TestSet.isTest = True TestSet.sort_dataset_by_chromosome( TrainSet.get_best_individual() ) # order the testset with chromosome found by GA with trainset TestSet.write_scores()
checkpoint = tf.train.get_checkpoint_state(MODEL_DIR) if checkpoint and checkpoint.model_checkpoint_path: saver.restore(session, checkpoint.model_checkpoint_path) print("Loaded checkpoint: {}".format(checkpoint.model_checkpoint_path)) else: print("Unable to load checkpoint") counter = 0 print(len(DataSet.TRAIN_DATASET.images)) saver.save(session, os.path.join(MODEL_DIR, "network"), global_step=counter) for epoch in range(3): print(epoch) for images, labels in DataSet.iter_batches(50): counter += 1 if counter % 100 == 0: print(counter) acc, summ = session.run([model.accuracy, summary], feed_dict = { model.input_var: images, model.corr_labels: labels, model.keep_prob: 1.0 }) writer.add_summary(summ, counter) print("iteration {}, training accuracy {}".format(counter, acc)) session.run([model.train], feed_dict = { model.input_var: images,