def load_models(opt): models = [] model_cnns = [] for iteration in opt.iterations: print(iteration) # Load infos infos = load_infos(opt, iteration) ignore = [ "id", "batch_size", "beam_size", "start_from_best", "input_json", "input_h5", "input_anno", "images_root", "aic_caption_path", "input_bu" ] for k in vars(infos['opt']).keys(): if k not in ignore: vars(opt).update({k: vars(infos['opt'])[k]}) # Setup cnn model model_cnn = setup_cnn(opt, iteration) # Setup model model = setup(opt, iteration) models.append(model) model_cnns.append(model_cnn) return models, model_cnns
def parse_texts(self): url = "%s/text/" % self.base_url content = self.request_content(url) # print content if content: pattern = re.compile( '<img.*?u-logo lazy".*?data-original="(.*?)".*?></a>.*?' + # user icon url '<a.*?u-user-name.*?target="_blank">(.*?)</a>.*?' + # user name '<span.*?f-ib f-fr">(.*?)</span>.*?' + # release time '<div.*?j-r-list-c-desc">(.*?)</div>', re.S) # text items = re.findall(pattern, content) models = [] for item in items: # name, icon user = User(item[0], item[1]) # user, time, text, content_from model = TextModel(user, item[2], item[3], self.name) models.append(model) return models else: print 'request content error' return None
def parse_texts(self): url = "%s/" % self.base_url content = self.request_content(url) # print content if content: pattern = re.compile( '<img.*?"user-img lazy-load left".*?data-src="(.*?)".*?onerror.*?>.*?' + # user icon url '<div.*?class="name-time-wrapper left">.*?<span.*?class="name">(.*?)</span>.*?' + # user name '<span.*?class="time timeago".*?>(.*?)</span>.*?' + # release time '<div.*?class="upload-txt no-mb">.*?<p>(.*?)</p>', re.S) # text items = re.findall(pattern, content) models = [] for item in items: # name, icon user = User(item[0], item[1]) # user, time, text, content_from model = TextModel(user, item[2], item[3], self.name) models.append(model) return models else: print 'request content error' return None
def __init__(self, posts): m = [] n = [] fmap = posts.familyhash.copy() for family in fmap.keys(): flist = fmap[family] if len(flist) >= 3: while (len(m)<=2): try: ele = flist.pop() except Exception as e: break if (ele.syllables > 7 and ele.syllables < 11): m.append(ele.text) for family in fmap.keys(): flist = fmap[family] if (len(flist)) >= 2: while(len(n)<=1): try: ele = flist.pop() except Exception as e: break if (ele.syllables > 4 and ele.syllables < 8): n.append(ele.text) if (len(m)+len(n)) < 5: return #Not a poem! self.poems += [[m[0],m[1],n[0],n[1],m[1]]]
def get_model_lists(): from django.utils.text import capfirst from django.utils.html import escape from django.contrib import admin def no_auto_fields(field): from django.db import models return not isinstance(field[2], models.AutoField) excluded_models = [Role, SupplyPlace, LateHealthPost] models = [] for model, m_admin in admin.site._registry.items(): # fetch ALL fields (including those nested via # foreign keys) for this model fields = [{ "caption": escape(capt), "name": name, "help_text": field.help_text } for name, capt, field in filter(no_auto_fields, nested_fields(model))] # pass model metadata and fields array # to the template to be rendered #select models only related to rutfet if model._meta.app_label == "rutfet" and model not in excluded_models: models.append({ "caption": capfirst(model._meta.verbose_name_plural), "name": model.__name__.lower(), "app_label": model._meta.app_label, "fields": fields }) return models
def get_defined_models(): import models import sqlalchemy members = dict(inspect.getmembers(models)) members.pop('Base') models = list() for name, member in members.items(): if isinstance(member, sqlalchemy.ext.declarative.api.DeclarativeMeta): models.append(member) return models
def main(args): print("Loading config file: ", args.config) params = utils.load_config_file(args.config) params["dataset_paths"] = utils.format_dataset_path( params["dataset_paths"]) if "nyu" not in params: params["nyu"] = False # Data loading code print("Creating data loaders...") if params["nyu"]: from dataloaders.nyu import NYUDataset val_dataset = NYUDataset(params["dataset_paths"], split='val') else: val_dataset = Datasets.FastDepthDataset(params["dataset_paths"], split='val', depth_min=params["depth_min"], depth_max=params["depth_max"], input_shape_model=(224, 224), random_crop=False) # set batch size to be 1 for validation data_loader = torch.utils.data.DataLoader( val_dataset, batch_size=1, shuffle=False, num_workers=params["num_workers"], pin_memory=True) # Set GPU params["device"] = torch.device( "cuda:{}".format(params["device"]) if params["device"] >= 0 and torch.cuda.is_available() else "cpu") print("Using device", params["device"]) print("Loading models...") models = [] model_names = [] for model_dict in params["models"]: model_names.append(Path(model_dict["model_path"]).stem) model, _ = utils.load_model(model_dict, model_dict["model_path"], params["device"]) model.to(params["device"]) models.append(model) # Create output directory output_directory = os.path.join(params["save_folder"], ".".join(model_names)) if not os.path.exists(output_directory): os.makedirs(output_directory) params["output_directory"] = output_directory print("Saving results to " + output_directory) compare_models(params, data_loader, models)
def compile_all_results (scenarios, dir='../data'): """ Compiles the results across multiple scenarios produced by running run_on_cluster on each one into a single sv file. The specified directory must be where where the results of running run_on_cluster for each scenario are stored (each is a sub-directory named v0, v1, etc.) and is also where the output from this function will be saved. """ models = [] causes = [] time = [] true_cf = [] true_std = [] std_bias = [] mean_abs_err = [] median_abs_err = [] mean_rel_err = [] median_rel_err = [] mean_csmf_accuracy = [] median_csmf_accuracy = [] mean_coverage_bycause = [] mean_coverage = [] percent_total_coverage = [] scenario = [] for i in range(scenarios): for j in ['bad_model', 'latent_simplex']: read = csv.reader(open('%s/v%s/%s_summary.csv' % (dir, i, j))) read.next() for row in read: models.append(row[0]) causes.append(row[1]) time.append(row[2]) true_cf.append(row[3]) true_std.append(row[4]) std_bias.append(row[5]) mean_abs_err.append(row[6]) median_abs_err.append(row[7]) mean_rel_err.append(row[8]) median_rel_err.append(row[9]) mean_csmf_accuracy.append(row[10]) median_csmf_accuracy.append(row[11]) mean_coverage_bycause.append(row[12]) mean_coverage.append(row[13]) percent_total_coverage.append(row[14]) scenario.append(i) all = pl.np.core.records.fromarrays([scenario, models, time, true_cf, true_std, causes, mean_abs_err, median_abs_err, mean_rel_err, median_rel_err, mean_csmf_accuracy, median_csmf_accuracy, mean_coverage_bycause, mean_coverage, percent_total_coverage], names=['scenario', 'model', 'time', 'true_cf', 'true_std', 'cause', 'mean_abs_err', 'median_abs_err', 'mean_rel_err', 'median_rel_err', 'mean_csmf_accuracy', 'median_csmf_accuracy', 'mean_covearge_bycause', 'mean_coverage', 'percent_total_coverage']) pl.rec2csv(all, fname='%s/all_summary_metrics.csv' % (dir))
def fit(csv_file, classifiers=None, features=None, svm_settings=None, knn_settings=None, lda_settings=None): """ Function gets user input of which features to perform the fitting on. User can also choose which classifier to use Arguments: csv_file: the file path to csv file containing data. This is formatted and split into training and control data classifiers: the array of classifier names Returns: models: array of trained classifiers control_data: data you can use to control the fit of the data per classifier """ if (classifiers == None): classifiers = select_classifiers() if (features == None): features = select_features(csv_file) training_data, control_data = data.get_dataframes(csv_file, features) target_data = training_data['diabetes'] training_data = training_data.drop(columns=['diabetes']) models = [] #The array containing trained models for c in classifiers: if c == 'svm': model = get_model(c, training_data, target_data, svm_settings) elif c == 'knn': model = get_model(c, training_data, target_data, knn_settings) elif c == 'lda': model = get_model(c, training_data, target_data, lda_settings) else: model = get_model(c, training_data, target_data) if model == None: pass else: models.append((c, model)) return models, training_data, target_data, control_data
def parse_images(self): url = '%s/pic/' % self.base_url content = self.request_content(url) # print content if content: pattern = re.compile( '<img.*?"user-img lazy-load left".*?data-src="(.*?)".*?onerror.*?>.*?' + '<div.*?class="name-time-wrapper left">.*?<span.*?class="name">(.*?)</span>.*?' + '<span.*?class="time timeago".*?>(.*?)</span>.*?' + '<div.*?class="upload-txt.*?<p>(.*?)</p>.*?' + '<img.*?class="upload-img lazy".*?data-src="(.*?)".*?>', re.S) # items = re.findall(pattern, content) models = [] for item in items: user = User(item[0], item[1]) # (self, user, time, image_url, text, content_from): model = ImageModel(user, item[2], item[3], item[4], self.name) models.append(model) return models
def parse_images(self): url = '%s/pic/' % self.base_url content = self.request_content(url) # print content if content: pattern = re.compile( '<img.*?u-logo lazy".*?data-original="(.*?)".*?></a>.*?' + # user icon url '<a.*?u-user-name.*?target="_blank">(.*?)</a>.*?' + # user name '<span.*?f-ib f-fr">(.*?)</span>.*?' + # release time '<div.*?j-r-list-c-desc">(.*?)</div>.*?' + # text '<img.*?class="lazy".*?data-original="(.*?)".*?>', re.S) #image url items = re.findall(pattern, content) models = [] for item in items: user = User(item[0], item[1]) # (self, user, time, image_url, text, content_from): model = ImageModel(user, item[2], item[3], item[4], self.name) models.append(model) return models
def getRelatedModels(entry): entries = {} models = [] try: for query, fk in reversed(list(entry.dependencies())): #for x in dir(fk): #print x for x in fk.model_class.select().where(query): #print 'here:' #print x modelname = fk.model_class.__name__ try: entries[modelname].append(x) except: models.append(modelname) entries[modelname] = [] entries[modelname].append(x) #entries.append((fk.model_class.__name__, x)) except: pass return (models, entries)
def init(): if os.path.exists('./saliency_data/{}'.format(args.name)): models = [] folds = [] for i in range(len(os.listdir('./saliency_data/{}/models'.format(args.name)))): models.append(keras.models.load_model('./saliency_data/{}/models/model{}.hdf5'.format(args.name, i))) folds.append((pd.read_pickle('./saliency_data/{}/data/train{}.p'.format(args.name, i)), pd.read_pickle('./saliency_data/{}/data/test{}.p'.format(args.name, i)), pd.read_pickle('./saliency_data/{}/data/val{}.p'.format(args.name, i)))) else: models = [] folds = deepconv_kfold.get_data('min', 'max', cols, args.preprocessing, args.simple, args.test_split, args.runs) if args.fold is not None: folds = [folds[args.fold]] for i, fold in enumerate(folds): train, test, val = fold model = deepconv_kfold.train_model_fold(train, val, args.model, args.epochs, i, use_keras=True) models.append(model) os.makedirs('./saliency_data/{}/models/'.format(args.name), exist_ok=True) os.makedirs('./saliency_data/{}/data/'.format(args.name), exist_ok=True) model.save('./saliency_data/{}/models/model{}.hdf5'.format(args.name, i)) test.to_pickle('./saliency_data/{}/data/test{}.p'.format(args.name, i)) train.to_pickle('./saliency_data/{}/data/train{}.p'.format(args.name, i)) val.to_pickle('./saliency_data/{}/data/val{}.p'.format(args.name, i)) return models, folds
def input_layer(sh_list, sh_test, sh_c_list, shm_list, train_loader, test_loader, model, rank, split, batch_size, batch_num, test_batch_num, epoch_num, lamda, lr, cv): update = 0 feed_q1 = sh_list[rank] grad_q1 = sh_list[rank + split - 1] #split = 3 send_output = sh_c_list[rank] feed_test = sh_test[rank] send_target = shm_list[0] models = [] outputs = [] inputs = [] optim = [] n = -1 * (rank - (split - 1)) #num_of_models = 2*split - 1 #num_of_models = n + 1 num_of_models = split #delay = n *(2)# + 1 delay = n #model.reset_parameters() for i in range(num_of_models): models.append(copy.deepcopy(model)) outputs.append(0) inputs.append(0) optim.append( torch.optim.SGD(models[i].parameters(), lr=lr, momentum=0.9, weight_decay=0.0005, nesterov=True)) #optim.append(torch.optim.Adam(models[i].parameters(),lr=1e-4)) #optim.append(torch.optim.SGD(models[i].parameters(),lr=lr)) for i in models: i.cuda(rank) model.cuda(rank) #data = data_set[:,:-mnist_data.NUM_LABELS] time_tot = 0 steps = int(batch_num / lamda) if batch_num % lamda != 1: steps += 1 lamda_back = lamda for epoch in range(epoch_num): #with torch.autograd.profiler.profile() as prof: s_t_u = resource_usage(RUSAGE_SELF) s_t = timestamp() model.train() for i in models: i.train() train_data = train_loader.__iter__() t = 0 t1 = 0 t2 = 0 t3 = 0 t4 = 0 t5 = 0 t6 = 0 t7 = 0 t8 = 0 td1 = 0 td2 = 0 td3 = 0 td4 = 0 td5 = 0 td6 = 0 #for time in range(1,(batch_num + 2 * split - (rank + 1) - 1 + 1)): for step in range(1, steps + 1): #off = (step-1)*lamda #cv.acquire() #cv.wait() #cv.notify_all() #cv.release() #cv.sync(rank) lamda = lamda_back if step == steps: lamda = batch_num - (step - 1) * lamda #print('step',step,'lamda',lamda) for time in range(1, lamda + delay + 1): #if time <= off + lamda : if time <= lamda: #offset = (time-1) * batch_size t1 = timestamp() #offset = t * batch_size data, target = next(train_data) send_target.send(target) #print('rank',rank,time,target) #x = x.view(-1,784) #input_feat = Variable(data,requires_grad=True).to("cuda:0") data = data.cuda(rank, non_blocking=True) #input_feat = Variable(data[offset:offset+batch_size,:],requires_grad=True).cuda(rank) t2 = timestamp() #print(input_feat) #print(input_feat.size()) model_idx = (time % num_of_models) - 1 #output = models[model_idx].forward(input_feat) output = models[model_idx].forward(data) #inputs[model_idx] = input_feat outputs[model_idx] = output t3 = timestamp() #print(output.size()) #output_send = output.to("cpu") feed_q1.send_wait() send_output.copy_(output.data) #print('send',t,send_output) feed_q1.async_send_signal() #feed_q1.send(output.data.to("cpu")) t += 1 t4 = timestamp() if time > delay: # t-(2K-k-1) #if time >= 1+ delay : # t-(2K-k-1) t5 = timestamp() pg = grad_q1.recv() pg = pg.cuda(rank) t6 = timestamp() output_idx = ((time - delay) % num_of_models) - 1 optimizer = optim[output_idx] optimizer.zero_grad() output = outputs[output_idx] output.backward(pg) #a = list(models[output_idx].parameters())[0].clone() optimizer.step() t7 = timestamp() #b = list(models[output_idx].parameters())[0].clone() #print(torch.equal(a.data,b.data)) td1 += t2 - t1 td2 += t3 - t2 td3 += t4 - t3 td4 += t6 - t5 td5 += t7 - t6 #print(time) #feed_q1.init() #grad_q1.init() model.init_zero() with torch.cuda.device(rank): for i in range(num_of_models): j = models[i].parameters() for k in model.parameters(): #k = 0 l = j.__next__() k.requires_grad_(False) k.copy_(k.data + l.data / num_of_models) for i in range(num_of_models): j = model.parameters() for k in models[i].parameters(): l = j.__next__() k.requires_grad_(False) k.copy_(l.data) k.requires_grad_(True) #print('average_done worker 1') e_t_u = resource_usage(RUSAGE_SELF) e_t = timestamp() u_t = e_t_u.ru_stime - s_t_u.ru_stime t = e_t - s_t time_tot = time_tot + t #print('node1 user time = %f time = %f time_tot = %f' % ( u_t , t, time_tot)) #print(prof) print('rank =', rank, 'recv output =', td1) print('rank =', rank, 'forward =', td2) print('rank =', rank, 'send output', td3) print('rank =', rank, 'recv grad =', td4) print('rank =', rank, 'backward =', td5) model.eval() for i in models: i.eval() for data, target in test_loader: #for i in range(test_batch_num): #print(data,target) #print('rank',rank,target) #offset = i * batch_size #x = Variable(test_set[offset:offset+batch_size,:]) x = Variable(data).cuda(rank) #x = x.view(-1,784) #x = x.to("cuda:0") output = model.forward(x) #output = output.to("cpu") #print(output.size()) feed_test.send(output.data.to('cpu')) #i += 1 if epoch == 150 or epoch == 225: lr = lr * 0.1 for i in optim: for j in i.param_groups: j['lr'] = lr
def predict_fast(args): transforms = Compose([]) dataset = OpenEDSDatasetTest(data_path=args.data_path, labels_file=args.label_file, save_path=args.save_dir, transforms=transforms, normalize={ "mean": [0.485, 0.456, 0.406], "std": [0.229, 0.224, 0.225] }, cumulative=True) data_loader = DataLoader(dataset, batch_size=1, num_workers=8, shuffle=False, pin_memory=False) models = [] for model_config in configs: conf = load_config(model_config.config_path) models_zoo = conf.get('models_zoo', 'selim') if models_zoo == 'qubvel': import segmentation_models_pytorch as smp model = smp.Unet(encoder_name=conf['encoder'], classes=conf['num_classes']) else: model = models.__dict__[conf['network']]( seg_classes=4, backbone_arch=conf['encoder']) model = torch.nn.DataParallel(model).cuda() checkpoint_path = model_config.weights_path checkpoint = torch.load(checkpoint_path, map_location="cpu") model.load_state_dict(checkpoint['state_dict']) model.eval() models.append([model, model_config]) with torch.no_grad(): for sample in tqdm(data_loader): imgs = sample["image"].cuda().float() preds_dict = {} for model, model_config in models: output = model(imgs) output_flip = torch.flip(model(torch.flip(imgs, dims=(3, ))), dims=(3, )) output = (output + output_flip) / 2 output = output.cpu() for i in range(output.shape[0]): img_name = sample["img_name"][i] if img_name not in preds_dict: preds_dict[img_name] = { 'output': output[i] * model_config.weight, 'total_weight': model_config.weight } else: preds_dict[img_name][ 'output'] += output[i] * model_config.weight preds_dict[img_name][ 'total_weight'] += model_config.weight preds_total = normalize_preds(preds_dict) save_preds(args, preds_total)
for j in range(int((n_frames - n_trains[i])/10)): (x, y) = next(test_gen) test_preds[j*batch_size:(j+1)*batch_size, :] = net.forward(x).cpu().detach().numpy() test_targets[j*batch_size:(j+1)*batch_size, :] = y.cpu().detach().numpy() temp = pearsonr(train_preds.squeeze(), train_targets.squeeze()) train_pcc[i] = temp[0] temp = pearsonr(test_preds.squeeze(), test_targets.squeeze()) test_pcc[i] = temp[0] temp = spearmanr(train_preds.squeeze(), train_targets.squeeze()) train_srocc[i] = temp[0] temp = spearmanr(test_preds.squeeze(), test_targets.squeeze()) test_srocc[i] = temp[0] models.append(net) savemat('results/nn_' + str(args.n_feats) + '_train_size_analysis.mat', {'nn_' + str(args.n_feats) + '_train_pcc': train_pcc, 'nn_' + str(args.n_feats) + '_test_pcc': test_pcc, 'nn_' + str(args.n_feats) + '_train_srocc': train_srocc, 'nn_' + str(args.n_feats) + '_test_srocc': test_srocc}) elif args.mode == 'analyze_scale_qp': n_train = args.train_size Net = models.model_class[args.model] data_gen_function = models.data_generator[args.model] net = Net(n_feats, h_size).cuda() train_gen = data_gen_function(n_train, n_feats, batch_size, args.data_path, 'train') net.train(train_gen, int(n_train/10), int(args.epochs)) test_gen = data_gen_function(n_train, n_feats, batch_size, args.data_path, 'test')
if __name__ == '__main__': args = argparser() # model_types = sorted(Prediction_Gammas.keys()) # model_types = ['dphprg','dphprgln','dphpg','dppn'] # model_types = ['dppprg','dppprgln','dpppg','dpppgln','dppn'] model_types = [ 'sdppprg', 'sdpppg', 'sdpppgln', 'sdppprgln', 'mdppprg', 'mdpppg' ] models = [] for model_type in model_types: mm = glob.glob(os.path.join(args.path, model_type, 'results*.db')) for m in mm: models.append((model_type, m)) pplrs = [] for model in models: print('Processing model {} '.format(model[0]), end=' ') try: pplrs.append(ppl_generation(model)) print('Passed') except pd.io.sql.DatabaseError: print('Failed') pass df = pd.DataFrame( pplrs, # columns = ('type','name','PPL_L1','PPL_L2','PPL_Linf','ES_Linf'), columns=('type', 'name', 'PPL_Linf', 'ES_Linf', 'PPL_Linf_F',
return kldr if __name__ == '__main__': args = argparser() paths = glob.glob(os.path.join(args.path, 'sim_*')) # model_types = ['dphpg','dphprg','dphprgln','dppn','vhpg'] model_types = ['dpppg', 'dppprg', 'dppprgln', 'dppn', 'vppg'] models = [] gens = [] for path in paths: for model_type in model_types: mm = glob.glob(os.path.join(path, model_type, 'results*.db')) for m in mm: models.append(m) gg = glob.glob(os.path.join(path, 'data.db')) for g in gg: gens.append(g) pool = Pool(processes=cpu_count(), initializer=limit_cpu) kldrs = list(pool.map(kl_wrapper, models + gens)) pool.close() df = pd.DataFrame( [(x[0], x[1], *x[2]) for x in kldrs], columns=['type', 'scenario'] + ['k_{}'.format(i) for i in range(len(kldrs[0][2]))], ) df.to_csv(os.path.join(args.path, 'kl_divergence.csv'), index=False)
def make_predictions(file,modelPath): trainingDataFiles = [file]#glob.glob("/scratch/staff/ak1774/shared_folder/data/train/*.h5") data = data_loader.load_data_from_file(trainingDataFiles[0]) models = [] for i in range(1,2): print(i) models.append( load_pytorch_model('ModelData/' +str(i) +'/' +'model.model', get_config('/' +str(i) +'/config.json'), data) ) #fullGameData,fullGameLabels = data_loader.getSequencialNaive(data,hero_feature_indicies,label_indicies) xLims = data['time'].values #¢health = data['player_4_m_iHealth'].values ####################### # get original health ###################### norm_stats = None with open("norm_stats.pickle", 'rb') as f: norm_stats = pickle.load(f) for label,min_value,max_value in normalization_stats: if "_m_iHealth" in label: health_min = min_value health_max = max_value if "m_iMaxHealth" in label: maxhealth_min = min_value maxhealth_max = max_value healthes = [] max_healthes = [] relative_healthes = [] for i in range(0,10): health_vals = data['player_' + str(i) + '_m_iHealth'].values maxhealth_vals = data['player_' + str(i) + '_m_iMaxHealth'].values health_vals = health_vals * (health_max - health_min) + health_min maxhealth_vals = maxhealth_vals * (maxhealth_max - maxhealth_min) + maxhealth_min relative_health_vals = health_vals / maxhealth_vals # hopefully maxhealth is never 0 healthes.append(health_vals) max_healthes.append(maxhealth_vals) relative_healthes.append(relative_health_vals) ####################### # get death times ###################### labels = [(i,label) for i,label in enumerate(list(data))] death_time_indicies = preprocess.labels_to_indicies(preprocess.select_features_by_name("time_until_next_death",labels)) death_times = data.values[:,death_time_indicies].astype(np.float32) for m in models: X = [torch.from_numpy(hero_X) for hero_X in m.fullGameData] pred = model(X) pred = torch.sigmoid(pred) pred = pred.cpu().detach().numpy() y = m.fullGameLabels currentMeanTrueAccuracy = 0 currentMeanFalseAccuracy=0 numTruePos = 0 numFalsePos = 0 numTrueNeg = 0 numFalseNeg = 0 for i in range(0,data.shape[0]): predX = 0 for m in models: y = m.fullGameLabels[i] y = np.array(y) y = np.expand_dims(y,0) X = [torch.from_numpy(hero_X[i:(i+1),:]) for hero_X in m.fullGameData] print(i) #predX = averagePred(models,X) predX = modelPred(m.model,X) +predX predX = predX/len(models) ''' true_pos = ((predX > 0.5) == (y > 0.5)).reshape(-1).astype(np.float32) true_neg = ((predX < 0.5) == (y <0.5)).reshape(-1).astype(np.float32) false_pos = ((predX > 0.5) == (y < 0.5)).reshape(-1).astype(np.float32) false_neg = ((predX < 0.5) == (y > 0.5)).reshape(-1).astype(np.float32) for pos in true_neg: if pos ==1: numTrueNeg +=1 for pos in false_neg: if pos ==1: numFalseNeg +=1 for pos in true_pos: if pos ==1: numTruePos +=1 for pos in false_pos: if pos ==1: numFalsePos +=1 ''' prediction = predX currentMeanTrueAccuracy += np.mean(true_pos) currentMeanFalseAccuracy += np.mean(false_pos) prediction = np.squeeze(prediction,0) if i %3000 ==0: print('Current true pos ' +str(currentMeanTrueAccuracy/(i+1))) print('Current false pos ' +str(currentMeanFalseAccuracy/(i+1))) heroStuff.append(prediction) labelStuff.append(np.squeeze(y,0)) print() print(numTruePos) print(numTrueNeg) print() print(numFalsePos) print(numFalseNeg) print() print('True Pos = ' + str(currentMeanTrueAccuracy/19326)) print('False pos = ' + str(currentMeanFalseAccuracy/19326)) heroStuff1 = np.swapaxes(heroStuff,0,1) labelStuff1= np.swapaxes(labelStuff,0,1) xLims = xLims - xLims[0] - 90 np.save('hero.npy', np.array(heroStuff1)) np.save('label.npy', np.array(labelStuff1)) np.save('xLims.npy', np.array(xLims)) np.save('health.npy',np.array(healthes))
def hidden_layer(sh_list, sh_test, sh_c_list, model, rank, split, batch_num, test_batch_num, epoch_num, lamda, lr, cv): feed_q1 = sh_list[rank - 1] grad_q1 = sh_list[rank + split - 2] send_output = sh_c_list[2 * rank] send_grad = sh_c_list[2 * rank - 1] feed_test = sh_test[rank - 1] if split > 2: feed_q2 = sh_list[rank] grad_q2 = sh_list[rank + split - 1] feed_test2 = sh_test[rank] models = [] outputs = [] inputs = [] optim = [] n = -1 * (rank - (split - 1)) #num_of_models = 2*split - 1 num_of_models = split #delay = n *(2)# + 1 #delay = 2* split -(rank+1) #- 1 delay = n #model.reset_parameters() for i in range(2 * split - 1): models.append(copy.deepcopy(model)) outputs.append(0) inputs.append(0) optim.append( torch.optim.SGD(models[i].parameters(), lr=lr, momentum=0.9, weight_decay=0.0005, nesterov=True)) #optim.append(torch.optim.Adam(models[i].parameters(),lr=1e-4)) #optim.append(torch.optim.SGD(models[i].parameters(),lr=lr)) for i in models: i.cuda(rank) model.cuda(rank) time_tot = 0 steps = int(batch_num / lamda) if batch_num % lamda != 0: steps += 1 lamda_back = lamda t = 0 for epoch in range(epoch_num): #with torch.autograd.profiler.profile() as prof: s_t_u = resource_usage(RUSAGE_SELF) s_t = timestamp() model.train() for i in models: i.train() t = 0 t1 = 0 t2 = 0 t3 = 0 t4 = 0 t5 = 0 t6 = 0 t7 = 0 t8 = 0 td1 = 0 td2 = 0 td3 = 0 td4 = 0 td5 = 0 td6 = 0 ########################################################################################################## #for time in range(1,(batch_num + 2*split - (rank + 1) -1 + 1)): for step in range(1, steps + 1): #off = (step-1)*lamda #for time in range(off+1 , off+lamda + delay ): #cv.acquire() #cv.wait() #cv.release() #cv.sync(rank) lamda = lamda_back if step == steps: lamda = batch_num - (step - 1) * lamda #print(rank,'steps',steps,'step',step,'lamda',lamda) for time in range(1, lamda + delay + 1): #if time <= off + lamda: # k = 2 ; t >= k if time <= lamda: # k = 2 ; t >= k t1 = timestamp() x = feed_q1.recv() x = x.cuda(rank, non_blocking=True) #print('recv',x) t2 = timestamp() input_feat = Variable(x, requires_grad=True) #input_feat = input_feat.to("cuda:1") model_idx = (time % num_of_models) - 1 output = models[model_idx].forward(input_feat) inputs[model_idx] = input_feat outputs[model_idx] = output t3 = timestamp() feed_q2.send_wait() send_output.copy_(output.data) feed_q2.async_send_signal() t += 1 t4 = timestamp() #pg = grad_q2.get() #if len(pg) > 0: #if time > delay: # t-(2K-k-1) if time > delay: # t-(2K-k-1) t5 = timestamp() pg = grad_q2.recv() pg = pg.cuda(rank) t6 = timestamp() output_idx = ((time - delay) % num_of_models) - 1 optimizer = optim[output_idx] optimizer.zero_grad() output = outputs[output_idx] output.backward(pg) #outputs[output_idx].backward(pg) #a = list(models[output_idx].parameters())[0].clone() optimizer.step() t7 = timestamp() #outputs[output_idx].backward(pg) #b = list(models[output_idx].parameters())[0].clone() #print(torch.equal(a.data,b.data)) #grad = inputs[output_idx].grad.data.to('cpu') grad_q1.send_wait() send_grad.copy_(inputs[output_idx].grad.data) #grad = pg grad_q1.async_send_signal() t8 = timestamp() #outputs[output_idx].backward(pg) td1 += t2 - t1 td2 += t3 - t2 td3 += t4 - t3 td4 += t6 - t5 td5 += t7 - t6 td6 += t8 - t7 ############################################################################################################### #feed_q2.init() #grad_q2.init() #print(time) model.init_zero() with torch.cuda.device(rank): for i in range(num_of_models): j = models[i].parameters() for k in model.parameters(): #k = 0 l = j.__next__() k.requires_grad_(False) k.copy_(k.data + l.data / num_of_models) for i in range(num_of_models): j = model.parameters() for k in models[i].parameters(): l = j.__next__() k.requires_grad_(False) k.copy_(l.data) k.requires_grad_(True) #print('average_done') e_t_u = resource_usage(RUSAGE_SELF) e_t = timestamp() u_t = e_t_u.ru_stime - s_t_u.ru_stime t = e_t - s_t time_tot = time_tot + t #print('node2 user time = %f time = %f tot_time = %f' % ( u_t , t, time_tot)) #print(prof) print('rank =', rank, 'recv output =', td1) print('rank =', rank, 'forward =', td2) print('rank =', rank, 'send output', td3) print('rank =', rank, 'recv grad =', td4) print('rank =', rank, 'backward =', td5) print('rank =', rank, 'send grad =', td6) model.eval() for i in models: i.eval() #for data,target in test_loader: for i in range(test_batch_num): x = feed_test.recv() x = x.cuda(rank) output = model.forward(x) #output = output.to('cpu') feed_test2.send(output.data.to('cpu')) if epoch == 150 or epoch == 225: lr = lr * 0.1 for i in optim: for j in i.param_groups: j['lr'] = lr
def generate_models(self, queue=None, output_fits=True): """ gblend.generate_models( queue=None, output_fits=True ) Generate all the high resolution models used in the fitting process. The high resolution catalog must be set before models can be generated. If output_fits = True then a fits file will be generated with the inital models. This will be created in the model_dir. queue is used to return models when run in parallel mode. """ if not self.hres_loaded: raise ValueError( 'You must set the high resolution catalog with gblend.set_hres_catalog() before generating models!' ) imgs = [] # loop through high resolution models for (i, model_obj) in enumerate(self): # catch any and all errors try: # generate models model_obj.generate_model( self.pad, (self.ny, self.nx), self.psf.copy(), use_integration=self.config['use_integration']) # the rest is for outputting fits files of input models - skip if output_fits==False if not output_fits: continue # generate extension for outputting to fits imgs.append(pyfits.ImageHDU(model_obj.model_img.copy())) # add to complete model if i == 0: full_model = model_obj.model_img.copy() else: full_model += model_obj.model_img.copy() except KeyboardInterrupt: raise except: # on exception return the traceback string. Also return the traceback string to the queue if was passed trace = traceback.format_exc() if queue is not None: queue.put((self.number, trace)) return trace # output a fits file for the full model and individual models if output_fits: # prepare hdulist hdus = [pyfits.PrimaryHDU(full_model)] hdus.extend(imgs) hdulist = pyfits.HDUList(hdus) # write out file - delete first file = '%s%d_input.fits' % (self.config['model_dir'], self.number) if os.path.isfile(file): os.remove(file) hdulist.writeto(file) # if queue is not None, then it is an ouput queue used to return the models to pygfit when running in parallel mode # that means I have to fetch the model from every model object and return them through the queue. # whatever I set in the queue will be passed back through gblend.set_models() if queue is not None: models = [] for model_obj in self: models.append(model_obj.model_img) # and send it off! queue.put((self.number, models)) # now we are ready for fitting! self.ready = True return True
if model_type == 'rf': m = RandomForestRegressor(n_estimators=100, random_state=1) elif model_type == 'dt': m = DecisionTreeRegressor() elif model_type == 'linear': m = LinearRegression() elif model_type == 'ridge': m = RidgeCV() elif model_type == 'svm': m = SVR(gamma='scale') elif model_type == 'gb': m = GradientBoostingRegressor(random_state=1) for feat_set in ['basic', 'dasc']: models.append(f'{model_type}_{feat_set}') if feat_set == 'basic': feat_set = feat_names[1:] elif feat_set == 'dasc': feat_set = ['X_d1', 'X_d2', 'X_d3'] m.fit(df_full[feat_set], df_full['Y_sig_mean_normalized'].values) for i, (k, v) in enumerate(ds.keys()): if v == 'test': df = ds[(k, v)] #if k == 'clath_aux+gak_a7d2_new': # df = df.dropna() X = df[feat_set] X = X.fillna(X.mean()) #y = df['Y_sig_mean_normalized']
def output_layer(sh_list, sh_test, sh_c_list, shm_list, train_loader, test_loader, model, loss_function, rank, split, batch_size, batch_num, test_batch_num, test_num, epoch_num, lamda, lr, cv): feed_q2 = sh_list[rank - 1] grad_q2 = sh_list[rank + split - 2] send_grad = sh_c_list[rank + split - 2] feed_test = sh_test[rank - 1] send_target = shm_list[0] start = torch.cuda.Event(enable_timing=True) end = torch.cuda.Event(enable_timing=True) models = [] outputs = [] inputs = [] optim = [] loss_tot = 0 time_tot = 0 cuda_time = 0 #test_num = test_set_labels.size(0) #num_of_models = 1#2*n + 1 n = -1 * (rank - (split - 1)) #num_of_models = 2*split - 1 num_of_models = split #delay = n *(2)# + 1 #delay = 2* split -(rank+1) #- 1 delay = n #model.reset_parameters() for i in range(num_of_models): models.append(copy.deepcopy(model)) outputs.append(0) inputs.append(0) optim.append( torch.optim.SGD(models[i].parameters(), lr=lr, momentum=0.9, weight_decay=0.0005, nesterov=True)) #optim.append(torch.optim.Adam(models[i].parameters(),lr=1e-4)) #optim.append(torch.optim.SGD(models[i].parameters(),lr=lr)) for i in models: i.cuda(rank) model.cuda(rank) #labels = data_set[:,-mnist_data.NUM_LABELS:].cuda(rank) ######### if have a error, cuda(rank) add steps = int(batch_num / lamda) if batch_num % lamda != 0: steps += 1 lamda_back = lamda t = 0 for epoch in range(epoch_num): #with torch.autograd.profiler.profile() as prof: s_t_u = resource_usage(RUSAGE_SELF) s_t = timestamp() start.record() loss_sum = 0 model.train() t = 0 t1 = 0 t2 = 0 t3 = 0 t4 = 0 t5 = 0 t6 = 0 t7 = 0 t8 = 0 td1 = 0 td2 = 0 td3 = 0 td4 = 0 td5 = 0 td6 = 0 train_data = train_loader.__iter__() #for time in range(1,(batch_num + 2*split - (rank+1) -1 + 1 )): for step in range(1, steps + 1): #for time in range(step , step+lamda + 2*split -(rank+1)- 1 + 1 ): #off = (step-1)*lamda #cv.acquire() #cv.wait() #cv.notify_all() #cv.release() #cv.sync(rank) lamda = lamda_back if step == steps: lamda = batch_num - (step - 1) * lamda #print(rank,'steps',steps,'step',step,'lamda',lamda) #print('sync',step,steps) for time in range(1, lamda + 1): #if time >= (rank +) : # t >= k ; k = 3 t1 = timestamp() # recv output offset = t * batch_size #data,target = next(train_data) #offset = (time - 1) * batch_size x = feed_q2.recv() x = x.cuda(rank, non_blocking=True) #print('recv',t,x) # label gpu load #data,target = next(train_data) #target = target.cuda(rank).long() target = send_target.recv() target = target.cuda(rank, non_blocking=True).long() #target = Variable(labels[offset:offset + batch_size,:]).long() #target = Variable(labels[offset:offset + batch_size,:]).cuda(rank) t2 = timestamp() model_idx = ( time % num_of_models ) - 1 ########################################### model idx correct #model_idx = 0 input_feat = Variable(x, requires_grad=True) output = models[model_idx].forward(input_feat) #print('rank',rank,time,target) #print(target.size()) loss = loss_function(output, target) t3 = timestamp() #loss = loss_function(output,torch.max(target,1)[1]) optimizer = optim[model_idx] optimizer.zero_grad() loss.backward() #a = list(models[model_idx].parameters())[0].clone() optimizer.step() t4 = timestamp() #b = list(models[model_idx].parameters())[0].clone() #print(torch.equal(a.data,b.data)) #grad = input_feat.grad.data.to('cpu') grad_q2.send_wait() send_grad.copy_(input_feat.grad.data) grad_q2.async_send_signal() t5 = timestamp() loss_sum = loss_sum + loss.data t += 1 td1 += t2 - t1 td2 += t3 - t2 td3 += t4 - t3 td4 += t5 - t4 #print(time) model.init_zero() with torch.cuda.device(rank): for i in range(num_of_models): j = models[i].parameters() for k in model.parameters(): #k = 0 l = j.__next__() k.requires_grad_(False) k.copy_(k.data + l.data / num_of_models) for i in range(num_of_models): j = model.parameters() for k in models[i].parameters(): l = j.__next__() k.requires_grad_(False) k.copy_(l.data) k.requires_grad_(True) loss_tot = loss_sum / batch_num e_t_u = resource_usage(RUSAGE_SELF) e_t = timestamp() u_t = e_t_u.ru_stime - s_t_u.ru_stime t = e_t - s_t end.record() torch.cuda.synchronize() cuda_time = cuda_time + start.elapsed_time(end) print( 'node3 user time = %f time = %f cuda time = %f cuda tot time = %f loss_tot = %f' % (u_t, t, start.elapsed_time(end), cuda_time, loss_tot)) #print('node3 user time = %f time = %f loss_tot = %f' % ( u_t , t,loss_tot)) #print(prof) time_tot = time_tot + t print('rank =', rank, 'recv output =', td1) print('rank =', rank, 'forward =', td2) print('rank =', rank, 'backward =', td3) print('rank =', rank, 'send grad =', td4) model.eval() total = 0 correct = 0 dev_loss_tot = 0 for data, target in test_loader: #for i in range(test_batch_num) : offset = i * batch_size ##################################### #print('rank',rank,i,target) x = feed_test.recv() #print(x) x = x.cuda(rank) target = target.cuda(rank) #target = Variable(test_set_labels[offset:offset+batch_size,:]).long() #target = Variable(test_set_labels[offset:offset+batch_size,:]) output = model.forward(x) _, pred = torch.max(output.data, 1) #dev_loss = loss_function(output,torch.max(target,1)[1]) dev_loss = loss_function(output, target) dev_loss_tot += dev_loss.item() #print('rank',rank,i,pred) #print(target,pred) #total += target.size(0) #print(total) #correct += (pred == torch.max(target,1)[1]).sum() correct += (pred == target).sum() #print('correct',correct) #i += 1 print('epoch=', epoch, 'tot_time =', time_tot, 'accuracy =', (100 * correct / test_num), 'test_loss', dev_loss_tot / test_batch_num) if epoch == 150 or epoch == 225: lr = lr * 0.1 for i in optim: for j in i.param_groups: j['lr'] = lr
def find_best_segmentation(model_funct, folder, validation_frac, metric="choose", raw_vol=raw, gt_vol=gt, aff_vol=aff, top_n_valid=3, gpus=1): l = custom_loss.loss() weights = np.zeros((3, 2)) weights[0] = [2.6960856, 0.61383891] weights[1] = [4.05724285, 0.57027915] weights[2] = [4.09752934, 0.56949214] models = [] model_keys = [] print("\nGrabbing models...\n") for file in os.listdir(folder): model = model_funct(verbose=0) try: model.load_weights(folder + "/" + file) models.append(model) model_keys.append(file) except OSError: print("\n%s is invalid\n" % file) continue print("\nLoaded %i models.\n" % len(models)) for key in model_keys: print("\t" + key) proc = process.process(l.weighted_cross, raw_vol, gt_vol, aff_vol, model=models[0], validation_frac=validation_frac, gpus=gpus) valid_loss = [] print("\nGetting validation loss for all models...\n") for model in models: proc.model = model valid_loss.append(proc.calc_validation_loss()) top_models = [] top_model_keys = [] top_indexs = np.asarray(valid_loss).argsort()[-top_n_valid:][::-1] for index in top_indexs: top_models.append(models[index]) top_model_keys.append(model_keys[index]) print("\nFound top %i models.\n" % top_n_valid) for key in top_model_keys: print("\t%s" % key) print("\nWatershed sweep...\n") segs, metrics = predict_and_watershed_on_list_of_models(top_models, metric=metric) if metric != "choose": index = np.where(metrics == np.max(metrics)) index = index[0] print("BEST PERFORMANCE WAS =" + str(np.max(metrics))) seg = segs[int(index)] else: n = 0 for metric in metrics: print("\tSegmentation %i: " % n) print("\t\tSplit: " + str(metric[0])) print("\t\tMerge: " + str(metric[1])) n += 1 index = input("\n\nWhich model?") seg = segs[int(index)] model_key = top_model_keys[math.floor( (int(index) / len(segs)) * len(top_models))] print("Model %s is the choice" % model_key) save_segmentation_tifs(gt_vol, seg)
def parse_args_and_settings(): parser = argparse.ArgumentParser() parser.add_argument( "-p", "--phase", default=None, help= "can be train, deploy (by default includes evaluate) or evaluate. Automatically inferred if not specified." ) # Only for training: # TODO @Future Make parser give warnings when wrong combination of arguments is given (standard feature of argparse perhaps?) parser.add_argument( "-c", "--conf_file", default=None, help= "Path to config file including .ini; can be left None only in deploy/evaluation, in which case it is derived from model/answers path.", metavar="FILE") parser.add_argument( "-r", "--random", action= 'store_true', # Note: also kinda used, but overwritten, during deploy/evaluate help="To sample values randomly from intervals in config file.") parser.add_argument( "-d", "--subdir", default=None, # Note: also used, but overwritten, during deploy help= "(training phase) use /models/<subdir> for output; by default set to year_month." ) parser.add_argument( "-n", "--run_name", default=None, # Note: also used, but overwritten, during deploy/evaluate help= "(training phase) by default set to time stamp; always automatically appended by randomly sampled params." ) # For deployment/evaluation: parser.add_argument( "-t", "--deploy_data", default=None, help= "Default is data on which the model was trained; can be 'train', 'test' or 'trial', abbreviations as defined in config_utils. During evaluation this can be omitted: it will be read from the predictions .csv file." ) parser.add_argument( "-l", "--deploy_level", default=None, help= "Scene or episode; default is the level on which the model was trained. During evaluation this can be omitted: it will be read from the predictions .csv file." ) # Only for deployment parser.add_argument( "-m", "--model", default=None, help= "(deployment phase) path to model, with base name (though without .pt suffix); if fold number is included, only that fold is considered." ) parser.add_argument( "--answers_per_fold", action='store_true', help= "To write an answers.txt file for each fold separately (in addition to their merger)." ) parser.add_argument( "--no_cv", action='store_true', help= "Option to prevent respecting cross-validation (which is done by default when deployed on training data)." ) parser.add_argument( "-s", "--no_eval", action='store_true', help= "If data includes keys, evaluation phase is run automatically after deployment; this option prevents that." ) # Only for evaluation parser.add_argument( "-a", "--answer_file", default=None, help="Evaluates an answer file, outputting interesting statistics.") parser.add_argument( "--no_semeval", action='store_true', help= "To turn off the SemEval filter for evaluation (filter which groups infrequent (< 3) entities together as 'other')." ) # Meta: parser.add_argument( "--no_cuda", action='store_true', help="Forces not using cuda; otherwise cuda is used whenever available." ) parser.add_argument( "-v", "--verbosity", type=int, default=3, help= "Sets verbosity regarding console output (default 3; lower to print less)." ) parser.add_argument( "-f", "--no_files", action='store_true', help="Prevents generation of folders and files (log, model, answer).") args = parser.parse_args() # If phase is not specified, this can usually be inferred from other arguments: if args.phase is None: if args.model is not None: args.phase = 'deploy' elif args.answer_file is not None: args.phase = 'evaluate' else: args.phase = 'train' # Use CUDA only if available: if not args.no_cuda and not torch.cuda.is_available: print( 'WARNING: CUDA requested but unavailable; running on cpu instead.') args.no_cuda = True # Deploy either a single model or a set of models (of the same type). # Also, from the model file arg.model also extract model_dir and run_name: if args.phase == 'deploy': if '.pt' in args.model: # A single model file .pt was provided, so deploy only on that: runs_path, args.run_name = os.path.split(args.model) args.model_dir, args.subdir = os.path.split(runs_path) args.run_name = args.run_name[:-3] # removes the .pt if '--fold' in args.run_name: args.run_name = args.run_name.split('--fold')[0] args.model = [args.model] else: # model name doesn't contain .pt (i.e., either directory, or directory+run_name: if os.path.isdir(args.model): # model is a directory runs_path = args.model args.run_name = None # To be extracted below else: # model is not a directory, nor .pt; hence only run_name of model is given: runs_path, args.run_name = os.path.split(args.model) args.model_dir, args.subdir = os.path.split(runs_path) # Get all model paths from directory (with run_name): models = [] for file in os.listdir(runs_path): if file.endswith(".pt"): if args.run_name is None: args.run_name = file[:-3] # removes the .pt if '--fold' in args.run_name: args.run_name = args.run_name.split('--fold')[0] if file.startswith(args.run_name): models.append(os.path.join(runs_path, file)) elif os.path.isdir(args.model): print( "ERROR: run_name could not be inferred; directory contains multiple runs.\n Rerun with more specific --model (i.e., including model file name, minus .pt and minus --fold#)." ) quit() args.model = sorted(models) # When evaluating, obtain run name etcetera from the provided answers .csv file: if args.phase == 'evaluate': args.run_name = os.path.basename(args.answer_file)[:-4] if args.run_name.endswith('--ensemble'): args.run_name = args.run_name[: -10] # removes the --ensemble suffix if '--fold' in args.run_name: args.run_name = args.run_name.split('--fold')[0] if '--cv' in args.run_name: args.run_name = args.run_name.split('--cv')[0] args.model_dir = None # This is kinda ugly. # For train phase a config file is mandatory; otherwise it can be automatically obtained: if args.conf_file is None: if args.phase == 'train': print( 'ERROR: training requires a config file (try including -c config.ini)' ) quit() elif args.phase == 'deploy': args.conf_file = os.path.join(runs_path, args.run_name + '.ini') elif args.phase == 'evaluate': args.conf_file = os.path.join(os.path.dirname(args.answer_file), args.run_name + '.ini') # Read the config file (either given as argument, or obtained from pre-trained model or its predictions file: if args.phase == 'deploy' or args.phase == 'evaluate': # Of course don't randomly sample when deploying or evaluating. args.random = False settings, fixed_params, sampled_params = config_utils.settings_from_config( args.conf_file, args.random) # NOTE: Which params were fixed or sampled determines the subdir and run_name in case of training. # If no level and data for deployment are given, these are taken from training data/level in config file if args.phase == 'deploy': args.deploy_level = args.deploy_level or settings.data.level args.deploy_data = args.deploy_data or settings.data.dataset if args.deploy_data in config_utils.data_paths: args.deploy_data = config_utils.data_paths[args.deploy_data][ args.deploy_level] # For evaluate, if deploy_data is not provided, attempt to read it from the answer_file: # (The alternative, of reading it from directory structure, seems too unsafe.) if args.phase == 'evaluate' and args.deploy_data is None: with open(args.answer_file) as file: firstline = file.readline() if firstline.startswith('#'): args.deploy_data = firstline.strip('# \n') # When deploying on a new dataset (not training data), cross-validation doesn't apply: if args.deploy_data != settings.data.dataset: args.no_cv = True # When training, create runs dir, id and run name if none were given (mostly time stamps). if args.phase == 'train': args.model_dir = 'models' # Default for training output. args.subdir = args.subdir or time.strftime("%Y_%m") args.run_name = args.run_name or time.strftime("%Y_%m_%d_%H_%M_%S") if not sampled_params: args.run_name = 'fixed--' + args.run_name else: # TODO @Future: Automatic run naming can be considerably improved wrt. readability. sampled_params_strings = sorted([ k[0:3] + "--" + str(sampled_params[k])[0:5].replace(",", "-") for k in sampled_params ]) args.run_name = '{0}--{1}'.format( args.run_name, "--".join(sampled_params_strings)) # Within the settings Namespace, which is subject to overwriting, make sure to include a backup, # so that original settings can at any time be saved to a new config file. settings.orig = copy.deepcopy(settings) print('--------------Setttings---------------------') print('[phase] ', args.phase) print('[deploy_data] ', args.deploy_data) print('[settings.data.datasets] ', settings.data.dataset) print('--------------------------------------------') return args, settings
def compile_all_results(scenarios, dir='../data'): """ Compiles the results across multiple scenarios produced by running run_on_cluster on each one into a single sv file. The specified directory must be where where the results of running run_on_cluster for each scenario are stored (each is a sub-directory named v0, v1, etc.) and is also where the output from this function will be saved. """ models = [] causes = [] time = [] true_cf = [] true_std = [] std_bias = [] mean_abs_err = [] median_abs_err = [] mean_rel_err = [] median_rel_err = [] mean_csmf_accuracy = [] median_csmf_accuracy = [] mean_coverage_bycause = [] mean_coverage = [] percent_total_coverage = [] scenario = [] for i in range(scenarios): for j in ['bad_model', 'latent_simplex']: read = csv.reader(open('%s/v%s/%s_summary.csv' % (dir, i, j))) read.next() for row in read: models.append(row[0]) causes.append(row[1]) time.append(row[2]) true_cf.append(row[3]) true_std.append(row[4]) std_bias.append(row[5]) mean_abs_err.append(row[6]) median_abs_err.append(row[7]) mean_rel_err.append(row[8]) median_rel_err.append(row[9]) mean_csmf_accuracy.append(row[10]) median_csmf_accuracy.append(row[11]) mean_coverage_bycause.append(row[12]) mean_coverage.append(row[13]) percent_total_coverage.append(row[14]) scenario.append(i) all = pl.np.core.records.fromarrays( [ scenario, models, time, true_cf, true_std, causes, mean_abs_err, median_abs_err, mean_rel_err, median_rel_err, mean_csmf_accuracy, median_csmf_accuracy, mean_coverage_bycause, mean_coverage, percent_total_coverage ], names=[ 'scenario', 'model', 'time', 'true_cf', 'true_std', 'cause', 'mean_abs_err', 'median_abs_err', 'mean_rel_err', 'median_rel_err', 'mean_csmf_accuracy', 'median_csmf_accuracy', 'mean_covearge_bycause', 'mean_coverage', 'percent_total_coverage' ]) pl.rec2csv(all, fname='%s/all_summary_metrics.csv' % (dir))