def save_to_www(results): # print('attempting to save results {}'.format(results)) try: #save locally in case i get chance to setup local server filename = 'pipeline_output.html' wwwpath = '/home/docker-user/appengine_api/output' #this issnt shared in docker... prob / wont be shared either wwwpath = '/data/www' wwwname = os.path.join(wwwpath,os.path.basename(filename)) print('WWW - saving json to '+wwwname) Utils.ensure_file(wwwname) with open(wwwname,'w') as fp: # json.dump(results,fp,indent=4) #object is not json serializable for whatever reason fp.write(str(results)) fp.close() print('WWW - writing') except: # print(sys.exc_info()[0]) print(sys.exc_info()) try: #save to server already running destname = '/data/www/'+filename print('copying to 13.69.27.202:'+destname) scpcmd = 'scp '+wwwname + ' [email protected]:'+destname subprocess.call(scpcmd,shell=True) except: print(sys.exc_info()) #attempt direct ftp since local save doesnt work and cant scp without local file try: import paramiko connection = paramiko.SSHClient() connection.set_missing_host_key_policy(paramiko.AutoAddPolicy()) connection.connect(13.69.27.202, username='******') ftp = connection.open_sftp() f = ftp.open(destname, 'w+') f.write(results) f.close() ftp.close() connection.close() except: print(sys.exc_info())
#generate report filename if type == 'pixlevel': outname = os.path.join( outdir, outdir[2:] + '_netoutput.txt' ) #TODO fix the shell script to not look for this, then it wont be needed if type == 'multilabel': outname = os.path.join(outdir, outdir[2:] + '_mlresults.html') if type == 'single_label': outdir = outdir + '_' + cat outname = os.path.join(outdir, outdir[2:] + '_' + cat + '_slresults.txt') loss_outputname = os.path.join(outdir, outdir[2:] + '_loss.txt') print('outname:{}\n lossname {}\n outdir {}\n'.format(outname, loss_outputname, outdir)) Utils.ensure_dir(outdir) time.sleep(0.1) Utils.ensure_file(loss_outputname) #copycmd = 'cp -r '+outdir + ' ' + host_dirname scpcmd = 'rsync -avz ' + outdir + ' [email protected]:/var/www/results/' + type + '/' i = 0 losses = [] iters = [] loss_avg = [0] * n_iter accuracy_avg = [0] * n_iter tot_iters = 0 #instead of taking steps its also possible to do #solver.solve() if type == 'multilabel':
def dosolve(weights, solverproto, testproto, type='single_label', steps_per_iter=1, n_iter=200, n_loops=200, n_tests=1000, cat=None, classlabels=None, baremetal_hostname='brainiK80a', solverstate=None): if classlabels is None: classlabels = ['not_' + cat, cat] caffe.set_device(int(sys.argv[1])) caffe.set_mode_gpu() solver = caffe.get_solver(solverproto) if weights is not None: solver.net.copy_from(weights) if solverstate is not None: solver.restore( solverstate) #see https://github.com/BVLC/caffe/issues/3651 #No need to use solver.net.copy_from(). .caffemodel contains the weights. .solverstate contains the momentum vector. Both are needed to restart training. If you restart training without momentum, the loss will spike up and it will take ~50k iterations to recover. At test time you only need .caffemodel. training_net = solver.net solver.test_nets[0].share_with( solver.net) #share train weight updates with testnet test_net = solver.test_nets[0] # more than one testnet is supported net_name = multilabel_accuracy.get_netname(testproto) docker_hostname = socket.gethostname() datestamp = datetime.datetime.strftime(datetime.datetime.now(), 'time%H.%M_%d-%m-%Y') prefix = baremetal_hostname + '_' + net_name + '_' + docker_hostname + '_' + datestamp #get netname, copy train/test to outdir tt = caffe_utils.get_traintest_from_proto(solverproto) print('netname {} train/test {}'.format(net_name, tt)) #detailed_jsonfile = detailed_outputname[:-4]+'.json' if weights: weights_base = os.path.basename(weights) else: weights_base = '_noweights_' threshold = 0.5 if net_name: outdir = type + '_' + prefix + '_' + weights_base.replace( '.caffemodel', '') else: outdir = type + '_' + prefix + '_' + testproto + '_' + weights_base.replace( '.caffemodel', '') outdir = outdir.replace('"', '') #remove quotes outdir = outdir.replace(' ', '') #remove spaces outdir = outdir.replace('\n', '') #remove newline outdir = outdir.replace('\r', '') #remove return outdir = './' + outdir #generate report filename, outdir to save everything (loss, html etc) if type == 'pixlevel': outname = os.path.join( outdir, outdir[2:] + '_netoutput.txt' ) #TODO fix the shell script to not look for this, then it wont be needed if type == 'multilabel': outname = os.path.join(outdir, outdir[2:] + '_mlresults.html') if type == 'single_label': outdir = outdir + '_' + cat outname = os.path.join(outdir, outdir[2:] + '_' + cat + '_slresults.txt') loss_outputname = os.path.join(outdir, outdir[2:] + '_loss.txt') print('outname:{}\n lossname {}\n outdir {}\n'.format( outname, loss_outputname, outdir)) Utils.ensure_dir(outdir) time.sleep(0.1) Utils.ensure_file(loss_outputname) #copy training and test files to outdir if tt is not None: if len(tt) == 1: #copy single traintest file to dir of info copycmd = 'cp ' + tt[0] + ' ' + outdir subprocess.call(copycmd, shell=True) else: #copy separate train and test files to dir of info copycmd = 'cp ' + tt[0] + ' ' + outdir subprocess.call(copycmd, shell=True) copycmd = 'cp ' + tt[1] + ' ' + outdir subprocess.call(copycmd, shell=True) #cpoy solverproto to results dir if solverproto is not None: copycmd = 'cp ' + solverproto + ' ' + outdir subprocess.call(copycmd, shell=True) #copy test proto to results dir if testproto is not None: copycmd = 'cp ' + testproto + ' ' + outdir subprocess.call(copycmd, shell=True) #copy this file too copycmd = 'cp solve.py ' + outdir subprocess.call(copycmd, shell=True) #copycmd = 'cp -r '+outdir + ' ' + host_dirname scpcmd = 'rsync -avz ' + outdir + ' [email protected]:/var/www/results/' + type + '/' i = 0 losses = [] iters = [] loss_avg = np.zeros(n_iter) accuracy_avg = np.zeros(n_iter) tot_iters = 0 #instead of taking steps its also possible to do #solver.solve() if type == 'multilabel': multilabel_accuracy.open_html( weights, dir=outdir, solverproto=solverproto, caffemodel=weights, classlabels=constants.web_tool_categories_v2, name=outname) for _ in range(n_loops): for i in range(n_iter): solver.step(steps_per_iter) # loss = solver.net.blobs['score'].data loss = solver.net.blobs['loss'].data loss_avg[i] = loss losses.append(loss) tot_iters = tot_iters + steps_per_iter * n_iter if type == 'single_label': accuracy = solver.net.blobs['accuracy'].data accuracy_avg[i] = accuracy print('iter ' + str(i * steps_per_iter) + ' loss:' + str(loss) + ' acc:' + str(accuracy)) else: print('iter ' + str(i * steps_per_iter) + ' loss:' + str(loss)) averaged_loss = np.mean(loss_avg) if type == 'single_label': averaged_acc = np.mean(accuracy_avg) s = 'avg loss over last {} steps is {}, acc:{}'.format( n_iter * steps_per_iter, averaged_loss, averaged_acc) print(s) s2 = '{}\t{}\t{}\n'.format(tot_iters, averaged_loss, averaged_acc) else: s = 'avg loss over last {} steps is {}'.format( n_iter * steps_per_iter, averaged_loss) print(s) s2 = '{}\t{}\n'.format(tot_iters, averaged_loss) #for test net: # solver.test_nets[0].forward() # test net (there can be more than one) with open(loss_outputname, 'a+') as f: f.write(str(int(time.time())) + '\t' + s2) f.close() # progress_plot.lossplot(loss_outputname) this hits tkinter problem if type == 'multilabel': precision, recall, accuracy, tp, tn, fp, fn = multilabel_accuracy.check_acc( test_net, num_samples=n_tests, threshold=0.5, gt_layer='labels', estimate_layer='prob') print('solve.py: p {} r {} a {} tp {} tn {} fp {} fn {}'.format( precision, recall, accuracy, tp, tn, fp, fn)) n_occurences = [tp[i] + fn[i] for i in range(len(tp))] multilabel_accuracy.write_html(precision, recall, accuracy, n_occurences, threshold, weights, positives=True, dir=outdir, name=outname) elif type == 'pixlevel': # number of tests for pixlevel val = range(0, n_tests) # jrinfer.seg_tests(solver, val, output_layer='mypixlevel_output', gt_layer='label', outfilename=outname, save_dir=outdir) elif type == 'single_label': acc = single_label_accuracy.single_label_acc( weights, testproto, net=test_net, label_layer='label', estimate_layer='fc4_0', n_tests=n_tests, classlabels=classlabels, save_dir=outdir) # test_net = solver.test_nets[0] # more than one testnet is supported # testloss = test_net.blobs['loss'].data try: testloss = test_net.blobs['loss'].data except: print('n o testloss available') testloss = 0 with open(loss_outputname, 'a+') as f: f.write('test\t' + str(int(time.time())) + '\t' + str(tot_iters) + '\t' + str(testloss) + '\t' + str(acc) + '\n') f.close() # # subprocess.call(copycmd,shell=True) subprocess.call(scpcmd, shell=True)
host_dirname = '/home/jeremy/caffenets/production' Utils.ensure_dir(host_dirname) baremetal_hostname = os.environ.get('HOST_HOSTNAME') prefix = baremetal_hostname + '.' + net_name + docker_hostname detailed_outputname = prefix + '.netoutput.txt' detailed_jsonfile = detailed_outputname[:-4] + '.json' loss_outputname = prefix + 'loss.txt' copy2cmd = 'cp ' + detailed_outputname + ' ' + host_dirname copy3cmd = 'cp ' + loss_outputname + ' ' + host_dirname copy4cmd = 'cp ' + detailed_jsonfile + ' ' + host_dirname scp2cmd = 'scp ' + detailed_outputname + ' [email protected]:/var/www/results/progress_plots/' scp3cmd = 'scp ' + loss_outputname + ' [email protected]:/var/www/results/progress_plots/' #scp4cmd = 'scp '+detailed_jsonfile + ' [email protected]:/var/www/results/progress_plots/' Utils.ensure_file(loss_outputname) Utils.ensure_file(detailed_outputname) i = 0 losses = [] iters = [] steps_per_iter = 1 n_iter = 20 loss_avg = [0] * n_iter accuracy_avg = [0] * n_iter tot_iters = 0 with open(loss_outputname, 'a+') as f: f.write('time \t tot_iters \t averaged_loss \t accuracy\n') f.close() for _ in range(100000): for i in range(n_iter):
def dosolve(weights, solverproto, testproto, type='single_label', steps_per_iter=1, n_iter=200, n_loops=200, n_tests=1000, cat=None, classlabels=None, baremetal_hostname='brainiK80a', solverstate=None, label_layer='label', estimate_layer='my_fc2'): if classlabels is None: classlabels = ['not_' + cat, cat] caffe.set_device(int(sys.argv[1])) caffe.set_mode_gpu() solver = caffe.get_solver(solverproto) if weights is not None: solver.net.copy_from(weights) if solverstate is not None: solver.restore( solverstate) #see https://github.com/BVLC/caffe/issues/3651 #No need to use solver.net.copy_from(). .caffemodel contains the weights. .solverstate contains the momentum vector. #Both are needed to restart training. If you restart training without momentum, the loss will spike up and it will take ~50k i #terations to recover. At test time you only need .caffemodel. training_net = solver.net solver.test_nets[0].share_with( solver.net) #share train weight updates with testnet test_net = solver.test_nets[0] # more than one testnet is supported #get netname, train_test train/test net_name = caffe_utils.get_netname(testproto) tt = caffe_utils.get_traintest_from_proto(solverproto) print('netname {} train/test {}'.format(net_name, tt)) docker_hostname = socket.gethostname() datestamp = datetime.datetime.strftime(datetime.datetime.now(), 'time%H.%M_%d-%m-%Y') prefix = baremetal_hostname + '_' + net_name + '_' + docker_hostname + '_' + datestamp #detailed_jsonfile = detailed_outputname[:-4]+'.json' if weights: weights_base = os.path.basename(weights) else: weights_base = '_noweights_' threshold = 0.5 if net_name: outdir = type + '_' + prefix + '_' + weights_base.replace( '.caffemodel', '') else: outdir = type + '_' + prefix + '_' + testproto + '_' + weights_base.replace( '.caffemodel', '') outdir = outdir.replace('"', '') #remove quotes outdir = outdir.replace(' ', '') #remove spaces outdir = outdir.replace('\n', '') #remove newline outdir = outdir.replace('\r', '') #remove return outdir = './' + outdir #generate report filename, outdir to save everything (loss, html etc) if type == 'pixlevel': outname = os.path.join( outdir, outdir[2:] + '_netoutput.txt' ) #TODO fix the shell script to not look for this, then it wont be needed if type == 'multilabel': outname = os.path.join(outdir, outdir[2:] + '_mlresults.html') if type == 'single_label': outdir = outdir + '_' + str(cat) outname = os.path.join(outdir, outdir[2:] + '_' + cat + '_slresults.txt') loss_outputname = os.path.join(outdir, outdir[2:] + '_loss.txt') print('outname:{}\n lossname {}\n outdir {}\n'.format( outname, loss_outputname, outdir)) Utils.ensure_dir(outdir) time.sleep(0.1) Utils.ensure_file(loss_outputname) #copy training and test files to outdir if tt is not None: if len(tt) == 1: #copy single traintest file to dir of info copycmd = 'cp ' + tt[0] + ' ' + outdir subprocess.call(copycmd, shell=True) else: #copy separate train and test files to dir of info copycmd = 'cp ' + tt[0] + ' ' + outdir subprocess.call(copycmd, shell=True) copycmd = 'cp ' + tt[1] + ' ' + outdir subprocess.call(copycmd, shell=True) #cpoy solverproto to results dir if solverproto is not None: copycmd = 'cp ' + solverproto + ' ' + outdir subprocess.call(copycmd, shell=True) #copy test proto to results dir if testproto is not None: copycmd = 'cp ' + testproto + ' ' + outdir subprocess.call(copycmd, shell=True) #copy this file too copycmd = 'cp solve_jr.py ' + outdir #if name o fthis file keeps changing we can use # os.path.realpath(__file__) which gives name of currently running file subprocess.call(copycmd, shell=True) #copycmd = 'cp -r '+outdir + ' ' + host_dirname #copy to server scpcmd = 'rsync -avz ' + outdir + ' [email protected]:/var/www/results/' + type + '/' #put in standard dir standard_dir = '/data/results/' + type + '/' Utils.ensure_dir(standard_dir) scpcmd2 = 'rsync -avz ' + outdir + ' /data/results/' + type + '/' i = 0 losses = [] iters = [] # loss_avg = [0]*n_iter loss_avg = np.zeros(n_iter) # accuracy_list = [0]*n_iter accuracy_list = np.zeros(n_iter) tot_iters = 0 iter_list = [] accuracy_list = [] #instead of taking steps its also possible to do #solver.solve() if type == 'multilabel': multilabel_accuracy.open_html( weights, dir=outdir, solverproto=solverproto, caffemodel=weights, classlabels=constants.web_tool_categories_v2, name=outname) for _ in range(n_loops): for i in range(n_iter): solver.step(steps_per_iter) # loss = solver.net.blobs['score'].data loss = solver.net.blobs['loss'].data loss_avg[i] = loss losses.append(loss) tot_iters = tot_iters + steps_per_iter # if type == 'single_label' or type == 'pixlevel': #test, may not work for pixlevel? #indeed does not work for pix if type == 'single_label': #test, may not work for pixlevel? #indeed does not work for pix accuracy = solver.net.blobs['accuracy'].data accuracy_list[i] = accuracy print('iter ' + str(i * steps_per_iter) + ' loss:' + str(loss) + ' acc:' + str(accuracy)) else: print('iter ' + str(i * steps_per_iter) + ' loss:' + str(loss)) iter_list.append(tot_iters) try: averaged_loss = np.average(loss_avg) s2 = '{}\t{}\n'.format(tot_iters, averaged_loss) except: print("something wierd with loss:" + str(loss_avg)) s = 0 for i in loss_avg: print i s = s + i averaged_loss = s / len(loss_avg) print('avg:' + str(s) + ' ' + str(averaged_loss)) s2 = '{}\t{}\n'.format(tot_iters, averaged_loss) #for test net: # solver.test_nets[0].forward() # test net (there can be more than one) # progress_plot.lossplot(loss_outputname) this hits tkinter problem if type == 'multilabel': #here accuracy is a list....jesus who wrote this precision, recall, accuracy, tp, tn, fp, fn = multilabel_accuracy.check_acc( test_net, num_samples=n_tests, threshold=0.5, gt_layer=label_layer, estimate_layer=estimate_layer) n_occurences = [tp[i] + fn[i] for i in range(len(tp))] multilabel_accuracy.write_html(precision, recall, accuracy, n_occurences, threshold, weights, positives=True, dir=outdir, name=outname, classlabels=classlabels) avg_accuracy = np.mean(accuracy) print('solve.py: loss {} p {} r {} a {} tp {} tn {} fp {} fn {}'. format(averaged_loss, precision, recall, accuracy, tp, tn, fp, fn)) s2 = '{}\t{}\t{}\n'.format(tot_iters, averaged_loss, avg_accuracy) elif type == 'pixlevel': # number of tests for pixlevel s = '#########\navg loss over last {} steps is {}'.format( n_iter * steps_per_iter, averaged_loss) print(s) # avg_accuracy = np.mean(accuracy) # print('accuracy mean {} std {}'.format(avg_accuracy,np.std(accuracy_list))) val = range(0, n_tests) # results_dict = jrinfer.seg_tests(solver, val, output_layer=estimate_layer, gt_layer='label', outfilename=outname, save_dir=outdir, labels=classlabels) # results_dict = jrinfer.seg_tests(test_net, val, output_layer=estimate_layer,gt_layer='label',outfilename=outname,save_dir=outdir,labels=classlabels) #dont need to send test_net, the jrinfer already looks for test net part of solver overall_acc = results_dict['overall_acc'] mean_acc = results_dict['mean_acc'] mean_ion = results_dict['mean_iou'] fwavacc = results_dict['fwavacc'] s2 = '{}\t{}\t{}\n'.format(tot_iters, averaged_loss, overall_acc, mean_acc, mean_ion, fwavacc) elif type == 'single_label': averaged_acc = np.average(accuracy_list) accuracy_list.append(averaged_acc) s = 'avg tr loss over last {} steps is {}, acc:{} std {'.format( n_iter * steps_per_iter, averaged_loss, averaged_acc, np.std(accuracy_list)) print(s) # print accuracy_list s2 = '{}\t{}\t{}\n'.format(tot_iters, averaged_loss, averaged_acc) acc = single_label_accuracy.single_label_acc( weights, testproto, net=test_net, label_layer='label', estimate_layer=estimate_layer, n_tests=n_tests, classlabels=classlabels, save_dir=outdir) # test_net = solver.test_nets[0] # more than one testnet is supported # testloss = test_net.blobs['loss'].data try: testloss = test_net.blobs['loss'].data except: print('no testloss available') testloss = 0 with open(loss_outputname, 'a+') as f: f.write('test\t' + str(int(time.time())) + '\t' + str(tot_iters) + '\t' + str(testloss) + '\t' + str(acc) + '\n') f.close() # params,n_timeconstants = fit_points_exp(iter_list,accuracy_list) # print('fit: asymptote {} tau {} x0 {} t/tau {}'.format(params[0],params[1],params[2],n_timeconstants)) ## if n_timeconstants > 10 and tot_iters>10000: #on a long tail # return params,n_timeconstants with open(loss_outputname, 'a+') as f: f.write(str(int(time.time())) + '\t' + s2) f.close() ## # subprocess.call(copycmd,shell=True) subprocess.call(scpcmd, shell=True) subprocess.call(scpcmd2, shell=True)