def oof(self, params, best_rounds, sub, do_logit=True): stacker_train = np.zeros((self.X.shape[0], 1)) dtest = xgb.DMatrix(data=self.test.values) idx = [] for index, (trn_idx, val_idx) in enumerate(self.skf.split(self.X, self.y)): idx.append(trn_idx) ''' trn_x, val_x = self.X[trn_idx], self.X[val_idx] trn_y, val_y = self.y[trn_idx], self.y[val_idx] dtrn = xgb.DMatrix(data=trn_x, label=trn_y) dval = xgb.DMatrix(data=val_x, label=val_y) # train model logging.info('Train model in fold {0}'.format(index)) cv_model = xgb.train( params=params, dtrain=dtrn, num_boost_round=best_rounds, verbose_eval=10, ) logging.info('Predict in fold {0}'.format(index)) prob = cv_model.predict(dtest, ntree_limit=best_rounds) stacker_train[val_idx,0] = cv_model.predict(dval, ntree_limit=best_rounds) sub['target'] += prob / self.N ''' if do_logit: sub['target'] = 1 / (1 + np.exp(-sub['target'])) stacker_train = 1 / (1 + np.exp(-stacker_train)) logging.info('{0} of folds'.format(self.N)) logging.info('Oof by single xgboost model Done') pickle.dump(idx, open('xgb.pkl', 'w')) st(context=21) return sub, stacker_train
def df_filter_row_by_id(): """ 功能 根据A表提供的ID 对B表的ID进行过滤 """ st(context=21) ret = {} try: # 获取相应数据 store = pd.HDFStore(HDF5_path) df_name = request.args.get('origin_samples', None) assert HDF5_PREF+df_name+DATA_SUFF in store.keys(), \ "dataframe %s not in store %s"%(HDF5_PREF+df_name+DATA_SUFF, store.filename) df_id_name = request.args.get('id_candicate', None) assert HDF5_PREF+df_id_name+DATA_SUFF in store.keys(), \ "dataframe %s not in store %s"%(HDF5_PREF+df_id_name+DATA_SUFF, store.filename) new_dataframe_name = request.args.get('new_dataframe_name', None) df = store[df_name+DATA_SUFF] ix.tag_meta_auto(df) tmp1 = store[df_id_name+DATA_SUFF]["DESYNPUF_ID"] def f1(*args): id = args[0] return id in [a for a in tmp1] cnt_rows = ix.select_rows_by_function(df, f1) df = ix.filter_rows(df, cnt_rows) update_df_in_HDFStore_by_name(store, new_dataframe_name, df) # assert df_name!=new_dataframe_name, "df_name and new_dataframe_name cannot be the same" # store.put(new_dataframe_name+DATA_SUFF, df) ret['info'] = 'affects %s number of rows'%(str(sum(cnt_rows))) store.close() except Exception, e: store.close() return render_template('dc_error.html', e_message=e)
def create_distribution_fig(df, df_name, col_name, dtype): """ 说明 根据df某一列的数据类型生成分布图 入参 df : df信息 df_name : 代表df名称的字符 col_name : df中某一列的信息 dtype : 该列的数据类型 出参 将该列对应的distribution fig信息拼接成字符串 figname1#figname2... 如果没有信息则返回为空 """ ret = [] for figtype in map_dtype_figtype.get(dtype): try: ax = df[col_name].plot(kind=figtype) if figtype not in need_value_count_figtype \ else df[col_name].value_counts().plot(kind=figtype) fig = ax.get_figure() # 由于要作为url 如果col_name中有特殊字符(如'.') 则需要被替换掉 # st(context=21) fig_name = str(df_name)+'_'+str(col_name.replace('.', '_'))+'_'+str(figtype)+'.png' fig.savefig(fig_dir+fig_name) print fig_name ret.append(fig_name) fig.clf() except Exception,e: st(context=21)
def display_images(self, visuals, epoch, table=True, phase='train'): idx = self._get_display_id(phase) if self.display_id > 0: if table: for i, (label, image_numpy) in enumerate(visuals.items()): if i == 0: image_conc = self.tensor2im(image_numpy) # input_shape = image_conc.shape # image_conc = image_conc.resize(self.outputSize) label_conc = label else: if 'sem' in label: from .util import labels_to_colors image = labels_to_colors(image_numpy, self.opt.color_palette).astype(np.uint8).transpose([2,0,1]) image_conc = np.concatenate((image_conc, image), axis=1) label_conc += ('\t' + label) else: image = self.tensor2im(image_numpy) # , imtype=np.uint16, convert_value=(pow(2, 16) - 1)) image_conc = np.concatenate((image_conc, image), axis=1) # if input_shape != image_conc.shape: # image_conc = imresize(image_conc, input_shape[0], interp='bilinear') label_conc += ('\t' + label) self.vis.image(image_conc, opts=dict(title='{} Epoch[{}] '.format(self.name, epoch) + label_conc), win=self.display_id + idx) else: st() for label, image_numpy in visuals.items(): self.vis.image((self.tensor2im(image_numpy)), opts=dict(title='{} Epoch[{}] '.format(self.name, epoch) + label), win=self.display_id + idx) idx += 1
def test_clip_augment(path): base_name = os.path.basename(path) usr_id = re.sub(r'_nohash_.*$', '', base_name) data = read_raw_wav(path) shift_size_ms = 100 data_shifted = Augmentataion.shifts_in_time(data, shift_size_ms) pickle.dump(data_shifted, open('../data/input/tmp/' + usr_id + '_shifted.pkl', 'wb'), pickle.HIGHEST_PROTOCOL) stretch_rate = 0.8 data_stretch = Augmentataion.stretch(data, stretch_rate) pickle.dump(data_stretch, open('../data/input/tmp/' + usr_id + '_stretch.pkl', 'wb'), pickle.HIGHEST_PROTOCOL) noise_weight = 0.05 # 0.01 - 0.05 noise_type = RUNNING_TAP data_noising = Augmentataion.adds_background_noise(data, noise_type, noise_weight) pickle.dump(data_noising, open('../data/input/tmp/' + usr_id + '_noising.pkl', 'wb'), pickle.HIGHEST_PROTOCOL) n_steps = 6 data_pitched = Augmentataion.shifts_in_pitch(data, n_steps) pickle.dump(data_pitched, open('../data/input/tmp/' + usr_id + '_pitched.pkl', 'wb'), pickle.HIGHEST_PROTOCOL) _, _, spec = Augmentataion.calculates_spectrogram(data) st(context=21)
def get_null_list_for_idx(self, idx): a_list = [] for i_null in self.null_N_set: tmp_a = [] if i_null == 1: tmp = [ bX==idx for bX in range(self.N) ] tmp_a.append(tmp) elif i_null ==2: for i_in in range(self.N): if not i_in==idx: tmp = [ bX in [i_in, idx] for bX in range(self.N) ] tmp_a.append(tmp) elif i_null ==3: for i_in in range(self.N): for i2_in in range(self.N): if not (i_in==i2_in or (i_in==idx or i2_in==idx)): tmp = [ ( bX in [i_in, i2_in, idx]) for bX in range(self.N) ] tmp_a.append(tmp) elif i_null ==4: for i4_in in range(self.N): if not (i4_in==idx): tmp = [ (bX==idx or (not bX==i4_in)) for bX in range(self.N) ] tmp_a.append(tmp) else: st() a_list.append(tmp_a) return a_list
def print_weights(network): first = True for m in network.modules(): if isinstance(m, nn.Conv2d): if first: first = False print('weight: {}'.format(m.weight.data)) st()
def ri2ssos(inp): st() sz = inp.shape nCh = int(int(sz[3])/2) if nCh == 1: out = tf.sqrt(tf.square(inp[:,:,:,0:nCh])+tf.square(inp[:,:,:,nCh:])) return out else: st()
def dataset_std(root, data_split, tasks): input_list = sorted(glob.glob(join(root, 'rgb', data_split, '*.jpg'))) st() targets_list = [] for task in tasks: targets_list.append( sorted(glob.glob(join(root, task, data_split, '*.png')))) # return list(zip(input_list, targets_list)) return input_list, targets_list
def get_grad_flow(named_parameters): ave_grads = [] layers = [] for n, p in named_parameters: st() if (p.requires_grad) and ("bias" not in n): layers.append(n) ave_grads.append(p.grad.abs().mean()) return ave_grads
def generate_pyc(name): st(context=21) fp, pathname, description = imp.find_module(name) print fp print pathname print description try: imp.load_module(name, fp, pathname, description) finally: if fp: fp.close()
def __init__(self, name, G, nCh_out, nCh=16, use_1x1Conv=False, w_decay=0): if G == 'UnetINDiv4_CCAM': self.net = UnetINDiv4_CCAM else: st() self.name = name self.nCh = nCh self.nCh_out = nCh_out self.reuse = False self.use_1x1Conv = use_1x1Conv self.w_decay = w_decay
def get_path(self, start, end): #self.get_current_planning_graph() try: traj, weight = path_planner.get_mpc_path(start,end,self.planning_graph) except: st() if traj and weight < 600: return traj, weight else: traj = False weight = None return traj, weight
def dc_dataset_register(): st(context=27) paras = {} csv_form = ReadCSVForm() df_form = ReadDFForm() paras['csv_form'] = csv_form paras['df_form'] = df_form if request.method=='POST': if request.form['submit']=='csv': paras['file_list'] = filter(None, request.form['file_list'].strip().split(';')) paras['nrows_list'] = filter(None, request.form['nrows_list'].strip().split(';')) try: # 保证长度与内容相等 assert len(paras['file_list'])==len(paras['nrows_list']), u'file_list与nrows_list长度不相等' # df_l [(df名1, df1), (df名2, df2), ...] df_l = [ ( 'df_'+str(f.split('.')[0]), ix.read_csv_file_to_df(P(f), sep=',', nrows=int(n)) ) if f else None for (f,n) in zip(paras['file_list'], paras['nrows_list']) ] # 以HDF5格式存入本地 ( 暂时用后缀区分data信息和meta信息 ) # 并计算每个dataframe的md5值 并存入数据表中 with pd.HDFStore(HDF5_path) as store: df_md5_tmp = {} for df in df_l: md5 = calculate_dataframe_md5(df[1]) df_name = df[0]+DATA_SUFF df_md5_tmp[df_name] = md5 store.put(df_name, df[1]) # 与原有的(df_name, md5)进行merge # merge_dataframe_md5(df_md5_tmp) paras['df_l_from_csv'] = df_l paras['df_nrow'] = 20 return render_template('dc_dataset_register.html', **paras) except Exception,e: return render_template('dc_error.html', e_message=e) elif request.form['submit']=='df': # df_l [(df名1, df1), (df名2, df2), ...] df_l = [] with pd.HDFStore(request.form['store_path'].strip()) as store: for s_k in store.keys(): if isinstance(store.get(s_k), pd.DataFrame): if s_k.endswith(DATA_SUFF): df_l.append( (extract_dataframe_name(s_k, HDF5_PREF, ''), store.get(s_k)) ) elif s_k.endswith(META_SUFF): df_l.append( (extract_dataframe_name(s_k, HDF5_PREF, ''), store.get(s_k)) ) else: pass paras['df_l_from_hdf5'] = df_l paras['df_nrow'] = 100 return render_template('dc_dataset_register.html', **paras) else: return render_template('dc_dataset_register.html', **paras)
def load_opts(opt, exp_name): #optLists = ['model','dataroot','savepath','nEpoch','lr','disp_div_N','batchSize','input_nc','gpu_ids','name','use_residual','no_flip','lambda_cost','weight_decay','use_dropout','optimizer','ri','normalize'] exp_dir = os.path.join(opt.savepath, exp_name) with open(os.path.join(exp_dir, 'opt.txt'), 'r') as opt_file: for aLine in opt_file.readlines(): idx = aLine.find(':') if idx == -1: continue else: cur_opt = aLine[:idx] cur_val = aLine[idx + 2:-1] if cur_opt == 'model': opt.model = cur_val elif cur_opt == 'dataroot': opt.dataroot = cur_val elif cur_opt == 'savepath': opt.savepath = cur_val elif cur_opt == 'nEpoch': opt.savepath = cur_val elif cur_opt == 'lr': opt.lr = float(cur_val) elif cur_opt == 'disp_div_N': opt.disp_div_N = int(cur_val) elif cur_opt == 'batchSize': opt.batchSize = int(cur_val) elif cur_opt == 'input_nc': opt.input_nc = int(cur_val) elif cur_opt == 'gpu_ids': cur_val = cur_val[1:-1] opt.gpu_ids = [int(cur_val)] print('Use GPU id......') elif cur_opt == 'name': opt.name = cur_val elif cur_opt == 'use_residual': opt.use_residual = (cur_val == 'True') elif cur_opt == 'no_flip': opt.use_residual = (cur_val == 'True') elif cur_opt == 'lambda_cost': opt.lambda_cost = float(cur_val) elif cur_opt == 'weight_decay': opt.weight_decay = float(cur_val) elif cur_opt == 'use_dropout': opt.use_dropout = (cur_val == 'True') elif cur_opt == 'optimizer': opt.optimizer = cur_val elif cur_opt == 'ri': opt.ri = (cur_val == 'True') elif cur_opt == 'normalize': opt.normalize = (cur_val == 'True') else: st() return opt
def __init__(self, name, G, nCh_out, nCh_seg, nCh=16, w_decay=0): if G == 'NVDLMED': self.net = NVDLMED else: st() self.name = name self.nCh = nCh self.nCh_out = nCh_out self.nCh_seg = nCh_seg self.reuse = False self.w_decay = w_decay self.reg_ = tf.contrib.layers.l2_regularizer( scale=self.w_decay) if self.w_decay > 0 else None
def value_and_gradient(self, X): id1 = (X @ self.u1[:-1]) + self.u1[-1] id2 = (X @ self.u2[:-1]) + self.u2[-1] if self.abs_act: id1 = torch.sign(id1) id2 = torch.sign(id2) else: id1 = id1.gt(0) * 1. id2 = id2.gt(0) * 1. # dy (X @ self.u0[:-1]) + self.u0[-1] + (id1 * sc1).sum(1, keepdims=True) - (id2 * sc2).sum(1, keepdims=True) st() dy = self.u0.T + id1 @ self.u1.T - id2 @ self.u2.T y = (X * dy[:, :-1]).sum(1) + dy[:, -1] return y, dy[:, :-1]
def onclick(event): global ix, iy, clicks, coords, ps, clickok if clickok: clickok = False ix, iy = event.xdata, event.ydata clicks += 1 coords.append((ix, iy)) if clicks % 2: # if odd print('x = %d, y = %d'%( ix, iy)) print('click on another point to set desired heading') clickok = True else: try: dys = coords[1][1] - coords[0][1] dxs = coords[1][0] - coords[0][0] theta = np.arctan2(-dys, dxs) / np.pi * 180 print('theta = %d'%(theta)) ps.append((coords[0][0], coords[0][1], theta, 0)) coords = [] start = ps[-2] end = ps[-1] traj, weight = astar_trajectory(simple_graph, start, end) #print(traj) print(weight) st() # while not complete_path_is_safe(traj): # safe_subpath, safe_start = longest_safe_subpath(traj) # # TODO: not sure how to generate the path # new_subpath = astar_trajectory(simple_graph, safe_start, end) # traj = safe_subpath + new_subpath for start, end in zip(traj, traj[1:]): #print('Start'+str(start)) #print(end) segment = segment_to_mpc_inputs(start, end, edge_info) print(segment) plt.plot(segment[0,0], segment[0,1], 'b.') plt.plot(segment[-1,0], segment[-1,1], 'rx') plt.plot(segment[:,0], segment[:,1], 'k--') plt.pause(0.1) print('trajectory plotted!') print('click to set desired xy') clickok = True plt.show() except: clickok = True print('CANNOT FIND TRAJECTORY: click again to set xy!') if len(ps) > 1: ps = ps[:-1]
def oof(self, model, params, sub): stacker_train = np.zeros((self.X.shape[0], 1)) for index, (trn_idx, val_idx) in enumerate(self.skf.split(self.X, self.y)): trn_x, val_x = self.X[trn_idx], self.X[val_idx] trn_y, val_y = self.y[trn_idx], self.y[val_idx] # train model logging.info('Train model in fold {0}'.format(index)) history = model.fit(x=trn_x, y=trn_y, validation_data=(val_x, val_y), shuffle=True) st(context=21) logging.info('Predict in fold {0}'.format(index)) prob = model.predict(x=self.test) stacker_train[val_idx,0] = model.predict(val_x) sub['target'] += prob / self.N logging.info('{0} of folds'.format(self.N)) logging.info('Oof by nn model Done') return sub, stacker_train
def __iter__(self): for df in pd.read_csv(self.input_file_path, chunksize=30000, sep=self.sep): # remain target keshi if self.target_keshi: df = df[df['keshi'].isin(self.target_keshi)] df = df.reset_index(drop=True) # cut fields for col in self.cut_columns: df[col + '_cut'] = df[col].astype('object').apply(self.__conduct_jieba_cut) # yield sentences try: for i in df.index: for col in self.wanted_columns: yield df.iloc[i][col + '_cut'] except Exception,e: st(context=21) print 'ee'
def addNoiseFromPath(path): rate,sample = wavfile.read(path) if type(sample[0])==np.ndarray: sample = sample[:,0] levelOfNoise = 0.1*np.average(abs(sample)) noise = np.random.normal(0,levelOfNoise,len(sample)) sample += noise Fe = 44100 f, t, Sxx = signal.spectrogram(sample, Fe,nfft=511,nperseg=len(sample)//225) st() Sxx = np.resize(Sxx, (256,256)) f = np.resize(f,256) t = np.resize(t,256) print(Sxx.shape) norm = cls.Normalize(vmin=-1.,vmax=1.) norm = cls.LogNorm(vmin=Sxx.min(), vmax=Sxx.max()) img = plt.pcolormesh(t, f, Sxx,norm=norm,cmap='jet') return img
def __init__(self, name, G, nCh_out, nCh=16, use_1x1Conv=False, w_decay=0, resid=False): if G == 'UnetINMultiDiv8': self.net = UnetINMultiDiv8 else: st() self.name = name self.nCh = nCh self.nCh_out = nCh_out self.reuse = False self.use_1x1Conv = use_1x1Conv self.w_decay = w_decay self.resid = resid
def dc_feature_engineering_datetime(): """ 功能 能进入到这里的数据 都保证是datetime类型了 将datetime列按格式转换, 并进行数值化处理 """ st(context=21) ret = {} try: store = pd.HDFStore(HDF5_path) df_name = request.args.get('df_name', None) new_dataframe_name = request.args.get('new_dataframe_name',None) value_as_base = pd.to_datetime(request.args.get('value_as_base')) derive_prefix = request.args.get('derive_prefix',None) assert HDF5_PREF+df_name+DATA_SUFF in store.keys(), \ "dataframe %s not in store %s"%(HDF5_PREF+df_name+DATA_SUFF, store.filename) df = store[df_name+DATA_SUFF] col_name_list = [] for col_name in request.args.getlist('col_names',None): col_name_list.append(col_name.split('.')[1]) print col_name # 将选中的datetime类型的列进行格式转换 ix.tag_meta_auto(df) ix.update_meta(df, col_name_list, "col_datatype","datetime") ix.type_casting(df, col_name_list, dt_format="%Y%m%d") store[df_name+DATA_SUFF] = df # 将转换后的数据存入新dataframe中 t_df = ix.derive_columns_from_datetime( df, col_name_list, value_as_base=value_as_base, inverse=True, derive_prefix=derive_prefix) if new_dataframe_name!='': update_df_in_HDFStore_by_name(store, new_dataframe_name, t_df) # 这里不需要跟新md5 因为格式已经变化 ret['impact_columns'] = str(len(col_name_list)) store.close() return json.dumps(ret) except Exception,e: return render_template('dc_error.html', e_message=e)
def display_images(self, visuals, epoch, table=True, phase='train'): idx = self._get_display_id(phase) if self.display_id > 0: if table: for i, (label, image_numpy) in enumerate(visuals.items()): if i == 0: image_conc = self.tensor2im(image_numpy) label_conc = label else: if 'sem' in label: from .util import labels_to_colors image = labels_to_colors( image_numpy, self.opt.color_palette).astype( np.uint8).transpose([2, 0, 1]) image_conc = np.concatenate((image_conc, image), axis=1) label_conc += ('\t' + label) else: image = self.tensor2im(image_numpy) image_conc = np.concatenate((image_conc, image), axis=1) label_conc += ('\t' + label) self.vis.image( image_conc, opts=dict(title='{} Epoch[{}] '.format(self.name, epoch) + label_conc), win=self.display_id + idx) else: st() for label, image_numpy in visuals.items(): self.vis.image( (self.tensor2im(image_numpy)), opts=dict( title='{} Epoch[{}] '.format(self.name, epoch) + label), win=self.display_id + idx) idx += 1
def train_cvx(filename, input_variables, M=20000, abs_act=False, tol=1e-8, beta=1e-4, nesting=0): # Load data (fs, ifs, ifs_star, out, _) = process_data(filename, output_regex='^sqJ$', input_columns=[v for v in input_variables]) if ifs.shape[0] ==0: u1 = torch.zeros((3,1)) u2 = torch.zeros((3,1)) u2[-1] = 1. # output will be -1 for every x -> always feasible return CvxModel(u1, u2, abs_act=False) # Scale n = (fs.shape[0] + 2*ifs.shape[0]) mean = (ifs.sum(0, keepdims=True) + ifs_star.sum(0, keepdims=True) + fs.sum(0, keepdims=True)) / n std = (((ifs - mean).square().sum(0, keepdims=True) + (ifs_star - mean).square().sum(0, keepdims=True) + (fs - mean).square().sum(0, keepdims=True)) / (n-1)).sqrt() std = std.mean() # to maintain the 1-Lishitzity I need to scale all dimensions by the same number. u1, u2 = train_network((fs - mean)/std, (ifs - mean)/std, (ifs_star - mean)/std, out / std, abs_act=abs_act, beta=beta) # Sparsify # u1 = u1[:, u1.norm(dim=0)>tol] # u2 = u2[:, u2.norm(dim=0)>tol] # Create model model = CvxModel(u1, u2, abs_act=abs_act, mean=mean, std=std) loss = ((model(ifs).squeeze() - out.squeeze()).abs().sum() + model(ifs_star).abs().sum() + model(fs).relu().sum()) / n if loss >= 1e-6: if nesting <= 5: st() print(f"Loss is too high when training on {filename}. Increasing M, lower beta, and retrying.") return train_cvx(filename, input_variables, M=2*M, beta=beta/2, abs_act=abs_act, tol=tol, nesting=nesting+1) else: raise RecursionError("Maximum recursion depth for failed training reached.") return model
def func(a, b=[]): print(f"{a=}") try: frame = inspect.currentframe() code = frame.f_code avs = inspect.getargvalues(frame) return avs argcount = code.co_argcount # 2 freevars = code.co_freevars # () cellvars = code.co_cellvars # () nlocals = code.co_nlocals # 6 stacksize = code.co_stacksize # 6 consts = code.co_consts # (None, 'a=', 2, 0, ('set_trace',)) flags = code.co_flags # 67 lnotab = code.co_lnotab # b'\x00\x01\x0e\x01\x02\x01\x08\x01\n\x01\x04\x01\x0c\x00\x06\x01\x06\x01\x06\x01' names = code.co_names # ('print', 'inspect', 'currentframe', 'getargvalues', 'ipdb', 'set_trace') gls, lcs = frame.f_globals, frame.f_locals funcname = frame.f_code.co_name # func funcscope = frame.f_back func = funcscope[funcname] filename = code.co_filename # '/Users/alberthan/VSCodeProjects/vytd/src/youtube-dl/DELETE.py' funclineno = code.co_firstlineno # 4 line = linecache.get(filename, funclineno) empty_tuple_or_zero = [ code.co_freevars, code.co_posonlyargcount, code.co_kwonlyargcount, ] x = 1 + 1 from ipdb import set_trace as st st() return x except: return b
def grid_search_tuning(self, cat_param, cat_param_grid, f_score, n_jobs): cat_estimator = cat.CatBoostClassifier(**cat_param) cat_gs = GridSearchCV( estimator=cat_estimator, param_grid=cat_param_grid, cv=self.skf, scoring=make_scorer(f_score, greater_is_better=True, needs_proba=True), verbose=2, n_jobs=n_jobs, refit=False ) time_begin = time.time() cat_gs.fit(self.X, self.y) st(context=21) time_end = time.time() logging.info('Grid search eat time {0} : params {1}'.format(time_end - time_begin, cat_param_grid)) logging.info('best_score_ : {0}'.format(cat_gs.best_score_)) logging.info('best_params_ : {0}'.format(cat_gs.best_params_)) for score in cat_gs.grid_scores_: logging.info('grid_scores_ : {0}'.format(score)) gc.collect() return cat_gs.best_score_, cat_gs.best_params_, cat_gs.grid_scores_
def dc_select_row_by_expr(): """ 功能 根据表达式过滤行 """ st(context=21) ret = {} try: store = pd.HDFStore(HDF5_path) # 接受ajax传回参数 df_name = request.args.get('df_name', None) assert HDF5_PREF+df_name+DATA_SUFF in store.keys(), \ "dataframe %s not in store %s"%(HDF5_PREF+df_name+DATA_SUFF, store.filename) new_dataframe_name = request.args.get('new_dataframe_name', None) rval_expr = request.args.get('rval_expr', None) # non_NA_percent = request.args.get('non_NA_percent',None) non_NA_percent = 0 # expr_symbol = urllib2.unquote(request.args.get('expr_symbol', None)) expr_symbol = '\$' # 按条件过滤数据 df = store[df_name+DATA_SUFF] ix.tag_meta_auto(df) cnt_rows = ix.select_rows_by_expr( df, expr_symbol=expr_symbol, non_NA_percent=non_NA_percent, rval_expr=rval_expr ) df = ix.filter_rows(df, cnt_rows) # 生成新dataframe update_df_in_HDFStore_by_name(store, new_dataframe_name, df) ret['info'] = 'affects %s number of rows'%(str(sum(cnt_rows))) store.close() except Exception, e: store.close() return render_template('dc_error.html', e_message=e)
def get_null_list_for_idx(self, idx): a_list = [] for i_null in self.null_N_set: tmp_a = [] if i_null == 1: tmp = [bX == idx for bX in range(self.N)] tmp_a.append(tmp) elif i_null == 2: for i_in in range(self.N): if not i_in == idx: tmp = [bX in [i_in, idx] for bX in range(self.N)] tmp_a.append(tmp) elif i_null == 3: for i_in in range(self.N): for ii_in in range(self.N): if not (i_in == ii_in or (i_in == idx or ii_in == idx)): tmp = [(bX in [i_in, ii_in, idx]) for bX in range(self.N)] tmp_a.append(tmp) elif i_null == 4: for i_in in range(self.N): for ii_in in range(self.N): for iii_in in range(self.N): if not ((i_in == ii_in or i_in == iii_in or ii_in == iii_in) or (i_in == idx or ii_in == idx or iii_in == idx)): tmp = [(bX in [i_in, ii_in, iii_in, idx]) for bX in range(self.N)] tmp_a.append(tmp) elif i_null == 5: for i4_in in range(self.N): for i5_in in range(self.N): for i6_in in range(self.N): if not ((idx in [i4_in, i5_in, i6_in]) or (i4_in == i5_in or i4_in == i6_in or i5_in == i6_in)): tmp = [(bX == idx) or not (bX in [i4_in, i5_in, i6_in]) for bX in range(self.N)] tmp_a.append(tmp) elif i_null == 6: for i5_in in range(self.N): for i6_in in range(self.N): if not (idx == i5_in or idx == i6_in or i5_in == i6_in): tmp = [(bX == idx or not (bX in [i5_in, i6_in])) for bX in range(self.N)] tmp_a.append(tmp) elif i_null == 7: for i6_in in range(self.N): if not (i6_in == idx): tmp = [(bX == idx or (not bX == i6_in)) for bX in range(self.N)] tmp_a.append(tmp) else: st() a_list.append(tmp_a) return a_list
def getBatch_RGB_varInp_tarid_missid(self, start, end, tar_id, miss_id): nB = end - start end = min([end, self.len]) start = end - nB batch = self.flist[start:end] # channel First : sz_a = [nB, self.nCh_out, self.nY, self.nX] sz_M = [nB, 1, self.nY, self.nX] target_class_idx = np.empty([nB, 1], dtype=np.uint8) a_img = np.empty(sz_a, dtype=np.float32) b_img = np.empty(sz_a, dtype=np.float32) c_img = np.empty(sz_a, dtype=np.float32) d_img = np.empty(sz_a, dtype=np.float32) e_img = np.empty(sz_a, dtype=np.float32) f_img = np.empty(sz_a, dtype=np.float32) g_img = np.empty(sz_a, dtype=np.float32) n_img = np.empty(sz_a, dtype=np.float32) target_img = np.empty(sz_a, dtype=np.float32) a_mask = np.zeros(sz_M, dtype=np.float32) b_mask = np.zeros(sz_M, dtype=np.float32) c_mask = np.zeros(sz_M, dtype=np.float32) d_mask = np.zeros(sz_M, dtype=np.float32) e_mask = np.zeros(sz_M, dtype=np.float32) f_mask = np.zeros(sz_M, dtype=np.float32) g_mask = np.zeros(sz_M, dtype=np.float32) n_mask = np.zeros(sz_M, dtype=np.float32) targ_idx = tar_id #random.randint(0,self.N-1) # Here, choose the random null idx in the set, and which is not in the target if tar_id == miss_id: N_for_null = 0 else: N_for_null = 1 # random.randint(0,len(self.null_N_set)-1) # 0: 7-->1 map # 6: 1-->1 map cur_list = self.list_for_null[targ_idx][N_for_null] if len(cur_list) == 1: tar_class_bools = cur_list[0] else: if miss_id > tar_id: s = -1 else: s = 0 tar_class_bools = cur_list[miss_id + s] #random.randint(0,len(cur_list)-1)] for iB, aFname in enumerate(batch): aug_idx = random.randint(0, 1) a_tmp = np.ndarray.astype(self.read_png( join(self.root, aFname + self.fExp[0] + '.png')), dtype=np.float32) b_tmp = np.ndarray.astype(self.read_png( join(self.root, aFname + self.fExp[1] + '.png')), dtype=np.float32) c_tmp = np.ndarray.astype(self.read_png( join(self.root, aFname + self.fExp[2] + '.png')), dtype=np.float32) d_tmp = np.ndarray.astype(self.read_png( join(self.root, aFname + self.fExp[3] + '.png')), dtype=np.float32) e_tmp = np.ndarray.astype(self.read_png( join(self.root, aFname + self.fExp[4] + '.png')), dtype=np.float32) f_tmp = np.ndarray.astype(self.read_png( join(self.root, aFname + self.fExp[5] + '.png')), dtype=np.float32) g_tmp = np.ndarray.astype(self.read_png( join(self.root, aFname + self.fExp[6] + '.png')), dtype=np.float32) n_tmp = np.ndarray.astype(self.read_png( join(self.root, aFname + self.fExp[7] + '.png')), dtype=np.float32) if self.use_aug: if aug_idx == 1: a_tmp = np.flip(a_tmp, axis=3) b_tmp = np.flip(b_tmp, axis=3) c_tmp = np.flip(c_tmp, axis=3) d_tmp = np.flip(d_tmp, axis=3) e_tmp = np.flip(e_tmp, axis=3) f_tmp = np.flip(f_tmp, axis=3) g_tmp = np.flip(g_tmp, axis=3) n_tmp = np.flip(n_tmp, axis=3) if self.use_norm_std: a_img[iB, :, :, :] = a_tmp[:, :, :, 0] / np.std(a_tmp) b_img[iB, :, :, :] = b_tmp[:, :, :, 0] / np.std(b_tmp) c_img[iB, :, :, :] = c_tmp[:, :, :, 0] / np.std(c_tmp) d_img[iB, :, :, :] = d_tmp[:, :, :, 0] / np.std(d_tmp) e_img[iB, :, :, :] = e_tmp[:, :, :, 0] / np.std(e_tmp) f_img[iB, :, :, :] = f_tmp[:, :, :, 0] / np.std(f_tmp) g_img[iB, :, :, :] = g_tmp[:, :, :, 0] / np.std(g_tmp) n_img[iB, :, :, :] = n_tmp[:, :, :, 0] / np.std(n_tmp) else: scale = 255.0 a_img[iB, :, :, :] = a_tmp[:, :, :, 0] / scale b_img[iB, :, :, :] = b_tmp[:, :, :, 0] / scale c_img[iB, :, :, :] = c_tmp[:, :, :, 0] / scale d_img[iB, :, :, :] = d_tmp[:, :, :, 0] / scale e_img[iB, :, :, :] = e_tmp[:, :, :, 0] / scale f_img[iB, :, :, :] = f_tmp[:, :, :, 0] / scale g_img[iB, :, :, :] = g_tmp[:, :, :, 0] / scale n_img[iB, :, :, :] = n_tmp[:, :, :, 0] / scale if targ_idx == 0: target_img[iB, :, :, :] = a_img[iB, :, :, :] a_mask[iB, 0, :, :] = 1. elif targ_idx == 1: target_img[iB, :, :, :] = b_img[iB, :, :, :] b_mask[iB, 0, :, :] = 1. elif targ_idx == 2: target_img[iB, :, :, :] = c_img[iB, :, :, :] c_mask[iB, 0, :, :] = 1. elif targ_idx == 3: target_img[iB, :, :, :] = d_img[iB, :, :, :] d_mask[iB, 0, :, :] = 1. elif targ_idx == 4: target_img[iB, :, :, :] = e_img[iB, :, :, :] e_mask[iB, 0, :, :] = 1. elif targ_idx == 5: target_img[iB, :, :, :] = f_img[iB, :, :, :] f_mask[iB, 0, :, :] = 1. elif targ_idx == 6: target_img[iB, :, :, :] = g_img[iB, :, :, :] g_mask[iB, 0, :, :] = 1. elif targ_idx == 7: target_img[iB, :, :, :] = n_img[iB, :, :, :] n_mask[iB, 0, :, :] = 1. else: st() target_class_idx[iB] = targ_idx return target_class_idx, a_img, b_img, c_img, d_img, e_img, f_img, g_img, n_img, a_mask, b_mask, c_mask, d_mask, e_mask, f_mask, g_mask, n_mask, tar_class_bools, target_img
def getBatch_RGB(self, start, end): end = min([end, self.len]) batch = self.flist[start:end] # channel First : sz_a = [end - start, self.nCh_out, self.nY, self.nX] sz_M = [end - start, 1, self.nY, self.nX] target_class_idx = np.empty([end - start, 1], dtype=np.uint8) a_img = np.empty(sz_a, dtype=np.float32) b_img = np.empty(sz_a, dtype=np.float32) c_img = np.empty(sz_a, dtype=np.float32) d_img = np.empty(sz_a, dtype=np.float32) e_img = np.empty(sz_a, dtype=np.float32) f_img = np.empty(sz_a, dtype=np.float32) g_img = np.empty(sz_a, dtype=np.float32) n_img = np.empty(sz_a, dtype=np.float32) target_img = np.empty(sz_a, dtype=np.float32) a_mask = np.zeros(sz_M, dtype=np.float32) b_mask = np.zeros(sz_M, dtype=np.float32) c_mask = np.zeros(sz_M, dtype=np.float32) d_mask = np.zeros(sz_M, dtype=np.float32) e_mask = np.zeros(sz_M, dtype=np.float32) f_mask = np.zeros(sz_M, dtype=np.float32) g_mask = np.zeros(sz_M, dtype=np.float32) n_mask = np.zeros(sz_M, dtype=np.float32) targ_idx = random.randint(0, self.N - 1) tar_class_bools = [x == targ_idx for x in range(self.N)] for iB, aFname in enumerate(batch): aug_idx = random.randint(0, 1) a_tmp = np.ndarray.astype(self.read_png( join(self.root, aFname + self.fExp[0] + '.png')), dtype=np.float32) b_tmp = np.ndarray.astype(self.read_png( join(self.root, aFname + self.fExp[1] + '.png')), dtype=np.float32) c_tmp = np.ndarray.astype(self.read_png( join(self.root, aFname + self.fExp[2] + '.png')), dtype=np.float32) d_tmp = np.ndarray.astype(self.read_png( join(self.root, aFname + self.fExp[3] + '.png')), dtype=np.float32) e_tmp = np.ndarray.astype(self.read_png( join(self.root, aFname + self.fExp[4] + '.png')), dtype=np.float32) f_tmp = np.ndarray.astype(self.read_png( join(self.root, aFname + self.fExp[5] + '.png')), dtype=np.float32) g_tmp = np.ndarray.astype(self.read_png( join(self.root, aFname + self.fExp[6] + '.png')), dtype=np.float32) n_tmp = np.ndarray.astype(self.read_png( join(self.root, aFname + self.fExp[7] + '.png')), dtype=np.float32) if self.use_aug: if aug_idx == 1: a_tmp = np.flip(a_tmp, axis=3) b_tmp = np.flip(b_tmp, axis=3) c_tmp = np.flip(c_tmp, axis=3) d_tmp = np.flip(d_tmp, axis=3) e_tmp = np.flip(e_tmp, axis=3) f_tmp = np.flip(f_tmp, axis=3) g_tmp = np.flip(g_tmp, axis=3) n_tmp = np.flip(n_tmp, axis=3) if self.use_norm_std: a_img[iB, :, :, :] = a_tmp[:, :, :, 0] / np.std(a_tmp) b_img[iB, :, :, :] = b_tmp[:, :, :, 0] / np.std(b_tmp) c_img[iB, :, :, :] = c_tmp[:, :, :, 0] / np.std(c_tmp) d_img[iB, :, :, :] = d_tmp[:, :, :, 0] / np.std(d_tmp) e_img[iB, :, :, :] = e_tmp[:, :, :, 0] / np.std(e_tmp) f_img[iB, :, :, :] = f_tmp[:, :, :, 0] / np.std(f_tmp) g_img[iB, :, :, :] = g_tmp[:, :, :, 0] / np.std(g_tmp) n_img[iB, :, :, :] = n_tmp[:, :, :, 0] / np.std(n_tmp) else: scale = 255.0 a_img[iB, :, :, :] = a_tmp[:, :, :, 0] / scale b_img[iB, :, :, :] = b_tmp[:, :, :, 0] / scale c_img[iB, :, :, :] = c_tmp[:, :, :, 0] / scale d_img[iB, :, :, :] = d_tmp[:, :, :, 0] / scale e_img[iB, :, :, :] = e_tmp[:, :, :, 0] / scale f_img[iB, :, :, :] = f_tmp[:, :, :, 0] / scale g_img[iB, :, :, :] = g_tmp[:, :, :, 0] / scale n_img[iB, :, :, :] = n_tmp[:, :, :, 0] / scale if targ_idx == 0: target_img[iB, :, :, :] = a_img[iB, :, :, :] a_mask[iB, 0, :, :] = 1. elif targ_idx == 1: target_img[iB, :, :, :] = b_img[iB, :, :, :] b_mask[iB, 0, :, :] = 1. elif targ_idx == 2: target_img[iB, :, :, :] = c_img[iB, :, :, :] c_mask[iB, 0, :, :] = 1. elif targ_idx == 3: target_img[iB, :, :, :] = d_img[iB, :, :, :] d_mask[iB, 0, :, :] = 1. elif targ_idx == 4: target_img[iB, :, :, :] = e_img[iB, :, :, :] e_mask[iB, 0, :, :] = 1. elif targ_idx == 5: target_img[iB, :, :, :] = f_img[iB, :, :, :] f_mask[iB, 0, :, :] = 1. elif targ_idx == 6: target_img[iB, :, :, :] = g_img[iB, :, :, :] g_mask[iB, 0, :, :] = 1. elif targ_idx == 7: target_img[iB, :, :, :] = n_img[iB, :, :, :] n_mask[iB, 0, :, :] = 1. else: st() target_class_idx[iB] = targ_idx return target_class_idx, a_img, b_img, c_img, d_img, e_img, f_img, g_img, n_img, a_mask, b_mask, c_mask, d_mask, e_mask, f_mask, g_mask, n_mask, tar_class_bools, target_img
def run_one_fold(fold): # read whole train / test data for tokenizer df_train = read_train_data() df_test = read_test_data() # fit tokenizer tokenizer = get_fitted_tokenizer(df_train, df_test) word_index = tokenizer.word_index transformers_count = 0 all_words = set(word_index.keys()) for toxic, transformers in toxicIndicator_transformers.items(): for transformer in transformers: if transformer == toxic: continue if transformer in all_words: transformers_count += tokenizer.word_counts[transformer] print('toxic transformer count : {0}'.format(transformers_count)) print('unique token : {0}'.format(len(word_index))) # get embedding lookup table embedding_dim = 300 embedding_path = '../data/input/glove_dir/glove.840B.300d.txt' # embedding_path = '../data/input/fasttext_dir/fasttext.300d.txt' embedding_lookup_table = get_embedding_lookup_table(word_index, embedding_path, embedding_dim) # read in fold data df_trn, df_val = read_data_in_fold(fold) # prepare data : pre truncating and post truncating X_test_pre = get_padded_pre_sequence(tokenizer, df_test[COMMENT_COL].astype('str').values.tolist()) X_test_post = get_padded_post_sequence(tokenizer, df_test[COMMENT_COL].astype('str').values.tolist()) id_test = df_test[ID_COL].values.tolist() print('Test data pre shape {0}'.format(X_test_pre.shape)) print('Test data post shape {0}'.format(X_test_post.shape)) if PRE_OR_POST=='pre': X_trn = get_padded_pre_sequence(tokenizer, df_trn[COMMENT_COL].astype('str').values.tolist()) y_trn = df_trn[label_candidates].values print('Fold {0} train data pre shape {1} '.format(fold, X_trn.shape)) X_val = get_padded_pre_sequence(tokenizer, df_val[COMMENT_COL].astype('str').values.tolist()) y_val = df_val[label_candidates].values id_val = df_val[ID_COL].values.tolist() print('Fold {0} valid data pre shape {1} '.format(fold, X_val.shape)) if PRE_OR_POST=='post': X_trn = get_padded_post_sequence(tokenizer, df_trn[COMMENT_COL].astype('str').values.tolist()) y_trn = df_trn[label_candidates].values print('Fold {0} train data post shape {1} '.format(fold, X_trn.shape)) X_val = get_padded_post_sequence(tokenizer, df_val[COMMENT_COL].astype('str').values.tolist()) y_val = df_val[label_candidates].values id_val = df_val[ID_COL].values.tolist() print('Fold {0} valid data post shape {1} '.format(fold, X_val.shape)) # preds result array preds_test_pre = np.zeros((X_test_pre.shape[0], NUM_OF_LABEL)) preds_test_post = np.zeros((X_test_post.shape[0], NUM_OF_LABEL)) assert preds_test_pre.shape and preds_test_post.shape, 'test data pre and post shape not match' preds_valid = np.zeros((X_val.shape[0], NUM_OF_LABEL)) # train model for run in range(RUNS_IN_FOLD): print('\nFold {0} run {1} begin'.format(fold, run)) # model model = get_model(embedding_lookup_table, float(FLAGS.dp), float(FLAGS.sdp)) # print(model.summary()) if mode == 'try': st(context=3) # callbacks val_auc = RocAucMetricCallback() es = EarlyStopping(monitor=VAL_AUC, mode='max', patience=5) bst_model_path = \ '../data/output/model/{0}fold_{1}run_{2}dp_{3}sdp_pool_cnn.h5'.format( fold, run, FLAGS.dp, FLAGS.sdp) mc = ModelCheckpoint(bst_model_path, save_best_only=True, save_weights_only=True) rp = ReduceLROnPlateau( monitor=VAL_AUC, mode='max', patience=2, cooldown=1, factor=np.sqrt(0.1), min_lr=0.0006, verbose=1 ) # train hist = model.fit( x=X_trn, y=y_trn, validation_data=(X_val, y_val), epochs=EPOCHS, batch_size=BATCH_SIZE, shuffle=True, callbacks=[val_auc, es, mc, rp] ) model.load_weights(bst_model_path) bst_val_score = max(hist.history[VAL_AUC]) print('\nFold {0} run {1} best val score : {2}'.format(fold, run, bst_val_score)) # predict print('\nFold {0} run {1} predict on test pre truncating'.format(fold, run)) preds_test_pre += model.predict(X_test_pre, batch_size=256, verbose=1) / RUNS_IN_FOLD print('\nFold {0} run {1} predict on test post truncating'.format(fold, run)) preds_test_post += model.predict(X_test_post, batch_size=256, verbose=1) / RUNS_IN_FOLD print('\nFold {0} run {1} predict on valid'.format(fold, run)) preds_valid += model.predict(X_val, batch_size=256, verbose=1) / RUNS_IN_FOLD print('\nFold {0} run {1} done'.format(fold, run)) del model gc.collect() # record preds result preds_test_avg = ( preds_test_pre + preds_test_post ) / 2.0 preds_test = preds_test_avg.T df_preds_test = pd.DataFrame() df_preds_test[ID_COL] = id_test for idx, label in enumerate(label_candidates): df_preds_test[label] = preds_test[idx] df_preds_test.to_csv( '../data/output/preds/pool_cnn/{0}/{1}/{2}fold_test.csv'.format(FLAGS.dp, FLAGS.sdp, fold), index=False) preds_valid = preds_valid.T df_preds_val = pd.DataFrame() df_preds_val[ID_COL] = id_val for idx, label in enumerate(label_candidates): df_preds_val[label] = preds_valid[idx] df_preds_val.to_csv( '../data/output/preds/pool_cnn/{0}/{1}/{2}fold_valid.csv'.format(FLAGS.dp, FLAGS.sdp, fold), index=False)
print str(top1.avg) + ' ' + str(loss.data[0]) + ' ' + 'batch_valid ' + str(i) # update better performance model global best_score if top1.avg > best_score: torch.save(model, args.save) print 'save model' best_score = top1.avg print str(top1.avg) + ' ' + str(loss.data[0]) + ' ' + 'epoch_valid ' + str(epoch) # Loop over epochs. lr = args.lr best_val_loss = None # At any point you can hit Ctrl + C to break out of training early. st(context=27) best_score = 0 try: optimizer = optim.Adam(model.parameters(), lr=args.lr) for epoch in range(1, args.epochs + 1): train(epoch, optimizer, questrainfealistShu, labeltrainlistShu, lengthtrainlistShu) valid(epoch, questrainfealistShu_valid, labeltrainlistShu_valid, lengthtrainlistShu_valid) except KeyboardInterrupt: print('-' * 89) print('Exiting from training early') def test(model, quesfeaShu, labelShu, lengthShu): model.eval()
for c in connected_clients: if (c not in [client, server]): c.sendall(data) if __name__ == '__main__': # 创建服务端监听socket server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # 在bind前设置端口复用 # server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) server_socket.bind((HOST, PORT)) server_socket.listen(LISTEN) print 'Server listen in port %s ' % (PORT) connected_clients.append(server_socket) while 1: r, w, e = select.select(connected_clients, [], [], 20) st(context=17) for s in r: if s is server_socket: conn, addr = server_socket.accept() connected_clients.append(conn) else: data = s.recv(4096) if not data: connected_clients.remove(s) else: print '<(%s, %s)> : ' % addr, data broadcast_all(data, s, server_socket) for s in w: pass for s in e: pass
def run_one_fold(fold): # read whole train / test data for tokenizer df_train = read_train_data() df_test = read_test_data() # fit tokenizer tokenizer = get_fitted_tokenizer(df_train, df_test) word_index = tokenizer.word_index transformers_count = 0 all_words = set(word_index.keys()) for toxic, transformers in toxicIndicator_transformers.items(): for transformer in transformers: if transformer==toxic: continue if transformer in all_words: transformers_count += tokenizer.word_counts[transformer] # print(transformer) print('toxic transformer count : {0}'.format(transformers_count)) print('unique token : {0}'.format(len(word_index))) # get embedding lookup table embedding_dim = 300 # fasttext_path = '../data/input/fasttext_dir/fasttext.300d.txt' # embedding_lookup_table = get_embedding_lookup_table(word_index, fasttext_path, embedding_dim) glove_path = '../data/input/glove_dir/glove.840B.300d.txt' # glove_path = '../data/input/glove_dir/glove.6B.{0}d.txt'.format(embedding_dim) embedding_lookup_table = get_embedding_lookup_table(word_index, glove_path, embedding_dim) # read in fold data df_trn, df_val = read_data_in_fold(fold) # prepare data X_test = get_padded_sequence(tokenizer, df_test[COMMENT_COL].astype('str').values.tolist()) id_test = df_test[ID_COL].values.tolist() print('Test data shape {0}'.format(X_test.shape)) X_trn = get_padded_sequence(tokenizer, df_trn[COMMENT_COL].astype('str').values.tolist()) y_trn = df_trn[label_candidates].values print('Fold {0} train data shape {1} '.format(fold, X_trn.shape)) X_val = get_padded_sequence(tokenizer, df_val[COMMENT_COL].astype('str').values.tolist()) y_val = df_val[label_candidates].values id_val = df_val[ID_COL].values.tolist() print('Fold {0} valid data shape {1} '.format(fold, X_val.shape)) # preds result array preds_test = np.zeros((X_test.shape[0], NUM_OF_LABEL)) preds_valid = np.zeros((X_val.shape[0], NUM_OF_LABEL)) # train model for run in range(RUNS_IN_FOLD): print('\nFold {0} run {1} begin'.format(fold, run)) # model model = get_model(embedding_lookup_table, float(FLAGS.dp), float(FLAGS.sdp)) print(model.summary()) st() # callbacks # es = EarlyStopping(monitor='val_acc', mode='max', patience=3) val_auc = RocAucMetricCallback() es = EarlyStopping(monitor=VAL_AUC, mode='max', patience=3) bst_model_path = '../data/output/model/{0}fold_{1}run_{2}dp_{3}sdp_glove_gru.h5'.format(fold, run, FLAGS.dp, FLAGS.sdp) mc = ModelCheckpoint(bst_model_path, save_best_only=True, save_weights_only=True) # train hist = model.fit( x=X_trn, y=y_trn, validation_data=(X_val, y_val), epochs=EPOCHS, batch_size=BATCH_SIZE, shuffle=True, callbacks=[val_auc, es, mc] ) model.load_weights(bst_model_path) # bst_val_score = max(hist.history['val_acc']) bst_val_score = max(hist.history[VAL_AUC]) print('\nFold {0} run {1} best val score : {2}'.format(fold, run, bst_val_score)) # predict preds_test += model.predict(X_test, batch_size=1024, verbose=1) / RUNS_IN_FOLD preds_valid += model.predict(X_val, batch_size=1024, verbose=1) / RUNS_IN_FOLD print('\nFold {0} run {1} done'.format(fold, run)) del model gc.collect() # record preds result preds_test = preds_test.T df_preds_test = pd.DataFrame() df_preds_test[ID_COL] = id_test for idx, label in enumerate(label_candidates): df_preds_test[label] = preds_test[idx] df_preds_test.to_csv('../data/output/preds/glove_gru/{0}/{1}/{2}fold_test.csv'.format(FLAGS.dp, FLAGS.sdp, fold), index=False) preds_valid = preds_valid.T df_preds_val = pd.DataFrame() df_preds_val[ID_COL] = id_val for idx, label in enumerate(label_candidates): df_preds_val[label] = preds_valid[idx] # df_preds_val.to_csv('../data/output/preds/fasttext_gru/{0}fold_valid.csv'.format(fold), index=False) df_preds_val.to_csv('../data/output/preds/glove_gru/{0}/{1}/{2}fold_valid.csv'.format(FLAGS.dp, FLAGS.sdp, fold), index=False)
def _build_model(self): with tf.device(self.GPU): if self.generator == 'unet': self.generatorA2B = UnetGenerator(self.opt, name='generatorA2B') self.generatorB2A = UnetGenerator(self.opt, name='generatorB2A') elif self.generator == 'unet_residual': self.generatorA2B = UnetGenerator(self.opt, res=True, name='generatorA2B') self.generatorB2A = UnetGenerator(self.opt, res=True, name='generatorB2A') else: st() if self.discriminator == 'basic': self.discriminatorA = Discriminator( self.opt, name='discriminatorA', use_sigmoid=self.use_sigmoid) self.discriminatorB = Discriminator( self.opt, name='discriminatorB', use_sigmoid=self.use_sigmoid) else: st() self.real_A = tf.placeholder(dtype, [1, None, self.nX, self.nC * 2], name='real_A') self.real_B = tf.placeholder(dtype, [1, None, self.nX, self.nC * 2], name='real_B') self.fake_B = self.generatorA2B(self.real_A) self.fake_A = self.generatorB2A(self.real_B) self.recon_B = self.generatorA2B(self.fake_A) self.recon_A = self.generatorB2A(self.fake_B) self.iden_B = self.generatorA2B(self.real_B) self.iden_A = self.generatorB2A(self.real_A) self.DA_fake = self.discriminatorA(self.fake_A) self.DB_fake = self.discriminatorB(self.fake_B) if self.use_wgan: self.gan_loss = -tf.reduce_mean(self.DB_fake) - tf.reduce_mean( self.DA_fake) self.cycle_loss = mse_criterion(self.real_A, self.recon_A) + mse_criterion( self.real_B, self.recon_B) self.g_loss_a2b = -tf.reduce_mean( self.DB_fake) + self.cyc_lambda * self.cycle_loss self.g_loss_b2a = -tf.reduce_mean( self.DA_fake) + self.cyc_lambda * self.cycle_loss self.g_loss = self.gan_loss + self.cyc_lambda * self.cycle_loss else: self.gan_loss = self.criterionGAN(self.DA_fake, tf.ones_like(self.DA_fake)) \ + self.criterionGAN(self.DB_fake, tf.ones_like(self.DB_fake)) self.cycle_loss = mae_criterion(self.real_A, self.recon_A) + mae_criterion( self.real_B, self.recon_B) self.g_loss_a2b = self.criterionGAN(self.DB_fake, tf.ones_like(self.DB_fake)) \ + self.cyc_lambda * self.cycle_loss self.g_loss_b2a = self.criterionGAN(self.DA_fake, tf.ones_like(self.DA_fake)) \ + self.cyc_lambda * self.cycle_loss self.g_loss = self.gan_loss + self.cyc_lambda * self.cycle_loss if self.use_identity: self.iden_loss = mae_criterion(self.real_A, self.iden_A) + mae_criterion( self.real_B, self.iden_B) self.g_loss_a2b = self.g_loss_a2b + self.iden_lambda * self.iden_loss self.g_loss_b2a = self.g_loss_b2a + self.iden_lambda * self.iden_loss self.g_loss = self.g_loss + self.iden_lambda * self.iden_loss self.fake_A_sample = tf.placeholder( dtype, [1, None, self.nX, self.nC * 2], name='fake_A_sample') self.fake_B_sample = tf.placeholder( dtype, [1, None, self.nX, self.nC * 2], name='fake_B_sample') self.DA_real = self.discriminatorA(self.real_A) self.DB_real = self.discriminatorB(self.real_B) self.DA_fake_sample = self.discriminatorA(self.fake_A_sample) self.DB_fake_sample = self.discriminatorB(self.fake_B_sample) if self.use_wgan: self.da_loss_real = -tf.reduce_mean(self.DA_real) self.da_loss_fake = tf.reduce_mean(self.DA_fake_sample) self.da_loss_GP = gradient_penalty(self.fake_A_sample, self.real_A, 1, self.discriminatorA) self.da_loss = (self.da_loss_real + self.da_loss_fake + self.GP_lambda * self.da_loss_GP) / 2 self.db_loss_real = -tf.reduce_mean(self.DB_real) self.db_loss_fake = tf.reduce_mean(self.DB_fake_sample) self.db_loss_GP = gradient_penalty(self.fake_B_sample, self.real_B, 1, self.discriminatorB) self.db_loss = (self.db_loss_real + self.db_loss_fake + self.GP_lambda * self.db_loss_GP) / 2 self.d_loss = self.da_loss + self.db_loss else: self.da_loss_real = self.criterionGAN( self.DA_real, tf.ones_like(self.DA_real)) self.da_loss_fake = self.criterionGAN( self.DA_fake_sample, tf.zeros_like(self.DA_fake_sample)) self.da_loss = (self.da_loss_real + self.da_loss_fake) / 2 self.db_loss_real = self.criterionGAN( self.DB_real, tf.ones_like(self.DB_real)) self.db_loss_fake = self.criterionGAN( self.DB_fake_sample, tf.zeros_like(self.DB_fake_sample)) self.db_loss = (self.db_loss_real + self.db_loss_fake) / 2 self.d_loss = self.da_loss + self.db_loss self.gan_loss_sum = tf.summary.scalar('generator/gan_loss', self.gan_loss) self.cycle_loss_sum = tf.summary.scalar('generator/cycle_loss', self.cycle_loss) self.g_loss_a2b_sum = tf.summary.scalar('generator/g_loss_a2b', self.g_loss_a2b) self.g_loss_b2a_sum = tf.summary.scalar('generator/g_loss_b2a', self.g_loss_b2a) self.g_loss_sum = tf.summary.scalar('generator/g_loss', self.g_loss) if self.use_identity: self.iden_loss_sum = tf.summary.scalar( 'generator/identity_loss', self.iden_loss) self.g_sum = tf.summary.merge([ self.gan_loss_sum, self.cycle_loss_sum, self.iden_loss_sum, self.g_loss_a2b_sum, self.g_loss_b2a_sum, self.g_loss_sum ]) else: self.g_sum = tf.summary.merge([ self.gan_loss_sum, self.cycle_loss_sum, self.g_loss_a2b_sum, self.g_loss_b2a_sum, self.g_loss_sum ]) self.da_loss_real_sum = tf.summary.scalar( 'discriminator/da_loss_real', self.da_loss_real) self.da_loss_fake_sum = tf.summary.scalar( 'discriminator/da_loss_fake', self.da_loss_fake) self.da_loss_sum = tf.summary.scalar('discriminator/da_loss', self.da_loss) self.db_loss_real_sum = tf.summary.scalar( 'discriminator/db_loss_real', self.db_loss_real) self.db_loss_fake_sum = tf.summary.scalar( 'discriminator/db_loss_fake', self.db_loss_fake) self.db_loss_sum = tf.summary.scalar('discriminator/db_loss', self.db_loss) self.d_loss_sum = tf.summary.scalar('discriminator/d_loss', self.d_loss) if self.use_wgan: self.da_loss_GP_sum = tf.summary.scalar( 'discriminator/da_loss_GP', self.da_loss_GP) self.db_loss_GP_sum = tf.summary.scalar( 'discirminator/db_loss_GP', self.db_loss_GP) self.d_sum = tf.summary.merge([ self.da_loss_real_sum, self.da_loss_fake_sum, self.da_loss_GP_sum, self.da_loss_sum, self.db_loss_real_sum, self.db_loss_fake_sum, self.db_loss_GP_sum, self.db_loss_sum, self.d_loss_sum ]) else: self.d_sum = tf.summary.merge([ self.da_loss_real_sum, self.da_loss_fake_sum, self.da_loss_sum, self.db_loss_real_sum, self.db_loss_fake_sum, self.db_loss_sum, self.d_loss_sum ]) self.scale_A = tf.placeholder(dtype, [1, None, self.nX, self.nC * 2], name='scale_tensor_A') self.scale_B = tf.placeholder(dtype, [1, None, self.nX, self.nC * 2], name='scale_tensor_B') self.real_A_ssos_sum = tf.summary.image( 'ssos/real_full', tf_imgri2ssos(self.real_A * self.scale_A), max_outputs=1) self.real_B_ssos_sum = tf.summary.image( 'ssos/real_down', tf_imgri2ssos(self.real_B * self.scale_B), max_outputs=1) self.fake_A_ssos_sum = tf.summary.image( 'ssos/fake_full', tf_imgri2ssos(self.fake_A * self.scale_B), max_outputs=1) self.fake_B_ssos_sum = tf.summary.image( 'ssos/fake_down', tf_imgri2ssos(self.fake_B * self.scale_A), max_outputs=1) self.recon_A_ssos_sum = tf.summary.image( 'ssos/recon_full', tf_imgri2ssos(self.recon_A * self.scale_A), max_outputs=1) self.recon_B_ssos_sum = tf.summary.image( 'ssos/recon_down', tf_imgri2ssos(self.recon_B * self.scale_B), max_outputs=1) self.ssos_sum = tf.summary.merge([ self.real_A_ssos_sum, self.real_B_ssos_sum, self.fake_A_ssos_sum, self.fake_B_ssos_sum, self.recon_A_ssos_sum, self.recon_B_ssos_sum ]) self.test_real_A = tf.placeholder(dtype, [1, None, self.nX, self.nC * 2], name='test_real_A') self.test_real_B = tf.placeholder(dtype, [1, None, self.nX, self.nC * 2], name='test_real_B') self.test_fake_B = self.generatorA2B(self.test_real_A) self.test_fake_A = self.generatorB2A(self.test_real_B) self.test_fake_img_B = tf.squeeze( tf_ri2comp(self.test_fake_B * self.scale_A)) self.test_fake_img_A = tf.squeeze( tf_ri2comp(self.test_fake_A * self.scale_B)) self.lr = tf.placeholder(dtype, None, name='learning_rate') self.lr_sum = tf.summary.scalar('learning_rate', self.lr) self.ga2b_optim = tf.train.AdamOptimizer(self.lr, beta1=self.beta1, beta2=self.beta2). \ minimize(self.g_loss_a2b, var_list=self.generatorA2B.variables) self.gb2a_optim = tf.train.AdamOptimizer(self.lr, beta1=self.beta1, beta2=self.beta2). \ minimize(self.g_loss_b2a, var_list=self.generatorB2A.variables) self.da_optim = tf.train.AdamOptimizer(self.lr, beta1=self.beta1, beta2=self.beta2). \ minimize(self.da_loss, var_list=self.discriminatorA.variables) self.db_optim = tf.train.AdamOptimizer(self.lr, beta1=self.beta1, beta2=self.beta2). \ minimize(self.db_loss, var_list=self.discriminatorB.variables)
def split_data_by_Kfold(K, silence_percentage, unknown_percentage): random.seed(RANDOM_SEED) ret = [] uid_list = [] for k in range(K): print('{0} fold'.format(k)) wanted_data = {TRAIN: [], VALID: []} ## Step1 ## ----Guarantee same 'unknown words' doesn't exist in both train and valid set random.shuffle(unknown_words) _unknown_words_train = unknown_words[:10] _unknown_words_valid = unknown_words[10:] unknown_data_train = [] unknown_data_valid = [] uid = {TRAIN: [], VALID: []} missing_unknown_counts = 0 train_unknown_counts = 0 valid_unknown_counts = 0 ## Step2 ## ----Split 10 known words and other unknown words for wav_path in gfile.Glob(search_path): _, word = os.path.split(os.path.dirname(wav_path)) word = word.lower() if word == BACKGROUND_NOISE_DIR_NAME: continue set_index, usr_id = distribute_fold(wav_path=wav_path, fold=k, K=K) uid[set_index].append(usr_id) if word in wanted_words: wanted_data[set_index].append({'label': word, 'file': wav_path}) else: if set_index==TRAIN and word in _unknown_words_train: train_unknown_counts += 1 unknown_data_train.append({'label': UNKNOWN_WORD_LABEL, 'file': wav_path}) elif set_index==VALID and word in _unknown_words_valid: valid_unknown_counts += 1 unknown_data_valid.append({'label': UNKNOWN_WORD_LABEL, 'file': wav_path}) else: missing_unknown_counts += 1 pass print('valid unknown counts : {0}'.format(valid_unknown_counts)) print('train unknown counts : {0}'.format(train_unknown_counts)) ## Step3 ## ----Add 'silence' and 'unknown' according to preset 'silence_percentage' and 'unknown_percentage' addition_silence_unknown_count = 0 for set_index in [VALID, TRAIN]: set_size = len(wanted_data[set_index]) # add silence data silence_size = int(math.ceil(set_size * silence_percentage / 100)) print('silence size : {0}'.format(silence_size)) addition_silence_unknown_count += silence_size for _ in range(silence_size): wanted_data[set_index].append({'label': SILENCE_LABEL, 'file': SILENCE_FILE}) # add unknown data unknown_size = int(math.ceil(set_size * unknown_percentage / 100)) print('unknown size : {0}'.format(unknown_size)) addition_silence_unknown_count += unknown_size if set_index==TRAIN: random.shuffle(unknown_data_train) st(context=21) wanted_data[set_index].extend(unknown_data_train[:unknown_size]) else: random.shuffle(unknown_data_valid) wanted_data[set_index].extend(unknown_data_valid[:unknown_size]) print('addition silence unknown count : {0}'.format(addition_silence_unknown_count)) ## Step4 ## ----Shuffle ordering for set_index in [VALID, TRAIN]: random.shuffle(wanted_data[set_index]) uid[set_index] = list(set(uid[set_index])) ret.append(wanted_data) uid_list.append(uid) print('') return ret, uid_list
def check_car_path(self,car): conflict_cars = [] failed = False blocked = False clear = True conflict = False mycone = self.get_vision_cone(car) self.car_boxes.clear() for cars in self.cars: box = Polygon([(cars.x-0.5,cars.y+1),(cars.x-0.5,cars.y-1),(cars.x+3,cars.y+1),(cars.x+3,cars.y-1)]) rot_box = affinity.rotate(box, np.rad2deg(cars.yaw), origin = (cars.x,cars.y)) self.car_boxes.update({cars.name: rot_box}) #print(self.car_boxes) # Now check if car box and vision cone intersect if not car.last_segment and not car.close: myblocked_cone = self.get_vision_cone_blocked(car) for key,val in self.car_boxes.items(): if key != car.name: if myblocked_cone.intersects(val): for cars in self.cars: if cars.name == key: if cars.status=='Failure' or cars.status =='Blocked': blocked = True print('Failure or blocked car ahead') for key,val in self.car_boxes.items(): if key != car.name: if mycone.intersects(val): clear = False #print('{0} stops because other car is in the path'.format(car.name)) for cars in self.cars: if cars.name == key: if cars.status=='Failure': #failed_cars.append(cars) failed = True print('{0} blocked by a failed car'.format(car.name)) elif cars.parked: print('Blocked by a parked car - Go on') else: conflict_cars.append(cars) # check if a pedestrian is in the cone mypedcone = self.get_vision_cone_pedestrian(car) for ped in self.peds: x_m = ped.state[0]/SCALE_FACTOR_SIM y_m = ped.state[1]/SCALE_FACTOR_SIM ped_point = Polygon([(x_m+2,y_m+1),(x_m+2,y_m-1),(x_m-1,y_m-1), (x_m-1,y_m+1),(x_m+2,y_m+1)]) if ped_point.intersects(mypedcone): clear = False print('{0} stops because a pedestrian is in the path'.format(car.name)) # check if they have a conflict with me mybox = self.car_boxes.get(car.name,0) for cars in conflict_cars: cone = self.get_vision_cone(cars) #print(cone) try: cone.intersects(mybox) except: st() if not cone.intersects(mybox): conflict_cars.remove(cars) else: conflict = True #print('There is a conflict') return clear, conflict_cars, failed, conflict, blocked
def run_one_fold(fold): # read whole train / test data for tokenizer df_train = read_train_data() df_test = read_test_data() # fit tokenizer : word level tokenizer = get_fitted_tokenizer(df_train, df_test) word_index = tokenizer.word_index transformers_count = 0 all_words = set(word_index.keys()) for toxic, transformers in toxicIndicator_transformers.items(): for transformer in transformers: if transformer==toxic: continue if transformer in all_words: transformers_count += tokenizer.word_counts[transformer] # print(transformer) print('toxic transformer count : {0}'.format(transformers_count)) print('unique token : {0}'.format(len(word_index))) # fit tokenizer : char level tokenizer_char = get_fitted_tokenizer_charLevel(df_train, df_test) word_index_char = tokenizer_char.word_index print('unique token char : {0}'.format(len(word_index_char))) # get embedding lookup table word level / char level embedding_dim = 300 glove_path = '../data/input/glove_dir/glove.840B.300d.txt' glove_embedding_lookup_table, glove_embedding_lookup_table_char = \ get_embedding_lookup_table(word_index, word_index_char, glove_path, embedding_dim) # read in fold data df_trn, df_val = read_data_in_fold(fold) # prepare data : word level X_test_word = get_padded_sequence(tokenizer, df_test[COMMENT_COL].astype('str').values.tolist()) id_test = df_test[ID_COL].values.tolist() print('Test data shape {0}'.format(X_test_word.shape)) X_trn_word = get_padded_sequence(tokenizer, df_trn[COMMENT_COL].astype('str').values.tolist()) y_trn = df_trn[label_candidates].values print('Fold {0} train data shape {1} '.format(fold, X_trn_word.shape)) X_val_word = get_padded_sequence(tokenizer, df_val[COMMENT_COL].astype('str').values.tolist()) y_val = df_val[label_candidates].values id_val = df_val[ID_COL].values.tolist() print('Fold {0} valid data shape {1} '.format(fold, X_val_word.shape)) # prepare data : char level X_test_char = get_padded_sequence_charLevel(tokenizer_char, df_test[COMMENT_COL].astype('str').values.tolist()) id_test = df_test[ID_COL].values.tolist() print('Test data shape {0}'.format(X_test_char.shape)) X_trn_char = get_padded_sequence_charLevel(tokenizer_char, df_trn[COMMENT_COL].astype('str').values.tolist()) y_trn = df_trn[label_candidates].values print('Fold {0} train data shape {1} '.format(fold, X_trn_char.shape)) X_val_char = get_padded_sequence_charLevel(tokenizer_char, df_val[COMMENT_COL].astype('str').values.tolist()) y_val = df_val[label_candidates].values id_val = df_val[ID_COL].values.tolist() print('Fold {0} valid data shape {1} '.format(fold, X_val_char.shape)) # prepare word / char level data X_test = [X_test_word, X_test_char] X_trn = [X_trn_word, X_trn_char] X_val = [X_val_word, X_val_char] # preds result array preds_test = np.zeros((X_test_word.shape[0], NUM_OF_LABEL)) preds_valid = np.zeros((X_val_word.shape[0], NUM_OF_LABEL)) # train model for run in range(RUNS_IN_FOLD): print('\nFold {0} run {1} begin'.format(fold, run)) # model model = get_model(glove_embedding_lookup_table, glove_embedding_lookup_table_char, float(FLAGS.dp)) # print(model.summary()) if mode == 'try': st(context=3) # callbacks es = EarlyStopping(monitor='val_acc', mode='max', patience=5) bst_model_path = '../data/output/model/{0}fold_{1}run_glove_cnn.h5'.format(fold, run) mc = ModelCheckpoint(bst_model_path, save_best_only=True, save_weights_only=True) rp = ReduceLROnPlateau( monitor='val_acc', mode='max', patience=3, factor=np.sqrt(0.1), verbose=1 ) # train hist = model.fit( x=X_trn, y=y_trn, validation_data=(X_val, y_val), epochs=EPOCHS, batch_size=BATCH_SIZE, shuffle=True, callbacks=[es, mc, rp] ) model.load_weights(bst_model_path) bst_val_score = max(hist.history['val_acc']) print('\nFold {0} run {1} best val score : {2}'.format(fold, run, bst_val_score)) # predict preds_test += model.predict(X_test, batch_size=512, verbose=1) / RUNS_IN_FOLD preds_valid += model.predict(X_val, batch_size=512, verbose=1) / RUNS_IN_FOLD print('\nFold {0} run {1} done'.format(fold, run)) del model gc.collect() # record preds result preds_test = preds_test.T df_preds_test = pd.DataFrame() df_preds_test[ID_COL] = id_test for idx, label in enumerate(label_candidates): df_preds_test[label] = preds_test[idx] df_preds_test.to_csv('../data/output/preds/glove_cnn/{0}/{1}fold_test.csv'.format(FLAGS.dp, fold), index=False) preds_valid = preds_valid.T df_preds_val = pd.DataFrame() df_preds_val[ID_COL] = id_val for idx, label in enumerate(label_candidates): df_preds_val[label] = preds_valid[idx] df_preds_val.to_csv('../data/output/preds/glove_cnn/{0}/{1}fold_valid.csv'.format(FLAGS.dp, fold), index=False)
#encoding=utf8 """ 被装饰的函数带带参数的情况 """ from ipdb import set_trace as st def print_debug(func): def __decorator(ser): print('enter the login') func(ser) print('exit the login') return __decorator @print_debug def login(user): print('in login:'******'jatsz')
def getBatch_RGB_varInpID(self, start, end): end = min([end, self.len]) batch = self.flist[start:end] # channel First : sz_a = [end - start, self.nCh_out, self.nY, self.nX] sz_M = [end - start, 1, self.nY, self.nX] target_class_idx = np.empty([end - start, 1], dtype=np.uint8) a_img = np.empty(sz_a, dtype=np.float32) b_img = np.empty(sz_a, dtype=np.float32) c_img = np.empty(sz_a, dtype=np.float32) d_img = np.empty(sz_a, dtype=np.float32) e_img = np.empty(sz_a, dtype=np.float32) f_img = np.empty(sz_a, dtype=np.float32) g_img = np.empty(sz_a, dtype=np.float32) n_img = np.empty(sz_a, dtype=np.float32) target_img = np.empty(sz_a, dtype=np.float32) a_mask = np.zeros(sz_M, dtype=np.float32) b_mask = np.zeros(sz_M, dtype=np.float32) c_mask = np.zeros(sz_M, dtype=np.float32) d_mask = np.zeros(sz_M, dtype=np.float32) e_mask = np.zeros(sz_M, dtype=np.float32) f_mask = np.zeros(sz_M, dtype=np.float32) g_mask = np.zeros(sz_M, dtype=np.float32) n_mask = np.zeros(sz_M, dtype=np.float32) targ_idx = random.randint(0, self.N - 1) tar_class_bools = [x == targ_idx for x in range(self.N)] # Here, choose the random file in the set, and which is not in the target bFname = self.flist[random.randint(0, self.len - 1)] random_for_batch2 = [] random.seed(2) a_rand = random.randint(0, self.N - 1) # change here to edit change ID N for i in range(1): while (a_rand in random_for_batch2) or (a_rand == targ_idx): a_rand = random.randint(0, self.N - 1) random_for_batch2.append(a_rand) random_for_batch2.sort() random_bools = [x in random_for_batch2 for x in range(self.N)] for iB, aFname in enumerate(batch): aug_idx = random.randint(0, 1) lFname = bFname if random_bools[0] else aFname a_tmp = np.ndarray.astype(self.read_png( join(self.root, lFname + self.fExp[0] + '.png')), dtype=np.float32) lFname = bFname if random_bools[1] else aFname b_tmp = np.ndarray.astype(self.read_png( join(self.root, lFname + self.fExp[1] + '.png')), dtype=np.float32) lFname = bFname if random_bools[2] else aFname c_tmp = np.ndarray.astype(self.read_png( join(self.root, lFname + self.fExp[2] + '.png')), dtype=np.float32) lFname = bFname if random_bools[3] else aFname d_tmp = np.ndarray.astype(self.read_png( join(self.root, lFname + self.fExp[3] + '.png')), dtype=np.float32) lFname = bFname if random_bools[4] else aFname e_tmp = np.ndarray.astype(self.read_png( join(self.root, lFname + self.fExp[4] + '.png')), dtype=np.float32) lFname = bFname if random_bools[5] else aFname f_tmp = np.ndarray.astype(self.read_png( join(self.root, lFname + self.fExp[5] + '.png')), dtype=np.float32) lFname = bFname if random_bools[6] else aFname g_tmp = np.ndarray.astype(self.read_png( join(self.root, lFname + self.fExp[6] + '.png')), dtype=np.float32) lFname = bFname if random_bools[7] else aFname n_tmp = np.ndarray.astype(self.read_png( join(self.root, lFname + self.fExp[7] + '.png')), dtype=np.float32) if self.use_aug: if aug_idx == 1: a_tmp = np.flip(a_tmp, axis=3) b_tmp = np.flip(b_tmp, axis=3) c_tmp = np.flip(c_tmp, axis=3) d_tmp = np.flip(d_tmp, axis=3) e_tmp = np.flip(e_tmp, axis=3) f_tmp = np.flip(f_tmp, axis=3) g_tmp = np.flip(g_tmp, axis=3) n_tmp = np.flip(n_tmp, axis=3) if self.use_norm_std: a_img[iB, :, :, :] = a_tmp[:, :, :, 0] / np.std(a_tmp) b_img[iB, :, :, :] = b_tmp[:, :, :, 0] / np.std(b_tmp) c_img[iB, :, :, :] = c_tmp[:, :, :, 0] / np.std(c_tmp) d_img[iB, :, :, :] = d_tmp[:, :, :, 0] / np.std(d_tmp) e_img[iB, :, :, :] = e_tmp[:, :, :, 0] / np.std(e_tmp) f_img[iB, :, :, :] = f_tmp[:, :, :, 0] / np.std(f_tmp) g_img[iB, :, :, :] = g_tmp[:, :, :, 0] / np.std(g_tmp) n_img[iB, :, :, :] = n_tmp[:, :, :, 0] / np.std(n_tmp) else: scale = 255.0 a_img[iB, :, :, :] = a_tmp[:, :, :, 0] / scale b_img[iB, :, :, :] = b_tmp[:, :, :, 0] / scale c_img[iB, :, :, :] = c_tmp[:, :, :, 0] / scale d_img[iB, :, :, :] = d_tmp[:, :, :, 0] / scale e_img[iB, :, :, :] = e_tmp[:, :, :, 0] / scale f_img[iB, :, :, :] = f_tmp[:, :, :, 0] / scale g_img[iB, :, :, :] = g_tmp[:, :, :, 0] / scale n_img[iB, :, :, :] = n_tmp[:, :, :, 0] / scale if targ_idx == 0: target_img[iB, :, :, :] = a_img[iB, :, :, :] a_mask[iB, 0, :, :] = 1. elif targ_idx == 1: target_img[iB, :, :, :] = b_img[iB, :, :, :] b_mask[iB, 0, :, :] = 1. elif targ_idx == 2: target_img[iB, :, :, :] = c_img[iB, :, :, :] c_mask[iB, 0, :, :] = 1. elif targ_idx == 3: target_img[iB, :, :, :] = d_img[iB, :, :, :] d_mask[iB, 0, :, :] = 1. elif targ_idx == 4: target_img[iB, :, :, :] = e_img[iB, :, :, :] e_mask[iB, 0, :, :] = 1. elif targ_idx == 5: target_img[iB, :, :, :] = f_img[iB, :, :, :] f_mask[iB, 0, :, :] = 1. elif targ_idx == 6: target_img[iB, :, :, :] = g_img[iB, :, :, :] g_mask[iB, 0, :, :] = 1. elif targ_idx == 7: target_img[iB, :, :, :] = n_img[iB, :, :, :] n_mask[iB, 0, :, :] = 1. else: st() target_class_idx[iB] = targ_idx return target_class_idx, a_img, b_img, c_img, d_img, e_img, f_img, g_img, n_img, a_mask, b_mask, c_mask, d_mask, e_mask, f_mask, g_mask, n_mask, tar_class_bools, target_img
np.transpose(voxel_, [2, 1, 0]) >= THRESHOLD, dims = [S1, S2, S3], translate = [0.0, 0.0, 0.0], scale = 1.0, axis_order = 'xyz' ) with open(filename, "wb") as f: binvox_obj.write(f) val = pickle.load(open("vis_embeddings/gqn3d_cond_tree_vae_concat_end_inf_segmentation_test/test/sphere gray large metal/000003_segmentation_masks.png","rb")) # cube = pickle.load(open("voxel_shapes/real_shapes/cube.p","rb")) # cylinder = pickle.load(open("voxel_shapes/real_shapes/cylinder.p","rb")) val = pickle.load(open("voxel_shapes/real_shapes/sphere.p","rb")) # cube_bce = utils.losses.binary_ce_loss(val,cube) # cylinder_bce = utils.losses.binary_ce_loss(val,cylinder) # sphere_bce = utils.losses.binary_ce_loss(val,sphere) # print("cube",cube_bce) # print("cylinder",cylinder_bce) # print("spher",sphere_bce) valrange= [i*0.05 for i in list(range(5,20))] print(valrange) for i in valrange: name = "ab_{}.binvox".format(i) st() save_voxel(val,name,THRESHOLD=i)
def index(): st(context=21) if 'username' in session: return 'Logged in as %s' % escape(session['username']) return 'You are not logged in'
def login(): st(context=21) if request.method == 'POST': session['username'] = request.form['username'] return redirect(url_for('index')) return '''
def run_cv(): # read whole train / test data for extractor df_train = read_train_data() df_test = read_test_data() X_test = df_test[COMMENT_COL].values id_test = df_test[ID_COL].values.tolist() extractor_word = get_extractor('word') extractor_word.fit(pd.concat((df_train.loc[:, COMMENT_COL], df_test.loc[:, COMMENT_COL]))) st(context=21) extractor_char = get_extractor('char') extractor_char.fit(pd.concat((df_train.loc[:, COMMENT_COL], df_test.loc[:, COMMENT_COL]))) st(context=21) X_test_word = conduct_transform(extractor_word, X_test) X_test_char = conduct_transform(extractor_char, X_test) X_test_all = hstack([X_test_word, X_test_char]) for fold in range(K): # read in fold data df_trn, df_val = read_data_in_fold(fold) X_trn = df_trn[COMMENT_COL].values X_trn_word = conduct_transform(extractor_word, X_trn) X_trn_char = conduct_transform(extractor_char, X_trn) X_trn_all = hstack([X_trn_word, X_trn_char]) y_trn = df_trn[label_candidates].values print('\nFold {0} train data shape {1} '.format(fold, X_trn.shape)) X_val = df_val[COMMENT_COL].values X_val_word = conduct_transform(extractor_word, X_val) X_val_char = conduct_transform(extractor_char, X_val) X_val_all = hstack([X_val_word, X_val_char]) y_val = df_val[label_candidates].values id_val = df_val[ID_COL].values.tolist() print('Fold {0} valid data shape {1} '.format(fold, X_val.shape)) # preds result array preds_test = np.zeros((X_test.shape[0], NUM_OF_LABEL)) preds_valid = np.zeros((X_val.shape[0], NUM_OF_LABEL)) models = [] for idx, label in enumerate(label_candidates): print('\nFold {0} label {1}'.format(fold, label)) model = get_model() print(' train') model.fit(X=X_trn_all, y=y_trn[:,idx]) models.append(model) print(' predict valid') preds_valid[:,idx] = model.predict_proba(X=X_val_all)[:,1] # predict in fold print('Fold {0} predict test'.format(fold)) for idx, model in enumerate(models): preds_test[:,idx] = model.predict_proba(X=X_test_all)[:,1] # record preds result preds_test = preds_test.T df_preds_test = pd.DataFrame() df_preds_test[ID_COL] = id_test for idx, label in enumerate(label_candidates): df_preds_test[label] = preds_test[idx] df_preds_test.to_csv('../data/output/preds/lr/{0}fold_test.csv'.format(fold), index=False) preds_valid = preds_valid.T df_preds_val = pd.DataFrame() df_preds_val[ID_COL] = id_val for idx, label in enumerate(label_candidates): df_preds_val[label] = preds_valid[idx] df_preds_val.to_csv('../data/output/preds/lr/{0}fold_valid.csv'.format(fold), index=False)
#encoding=utf8 """ """ from ipdb import set_trace as st class UpperString(object): def __init__(self): self._value = '' def __get__(self, instance, klass): return self._value def __set__(self, instance, value): self._value = value.upper() class MyClass(object): attribute = UpperString() st(context=21) instance_of = MyClass() instance_of.attribute instance_of.attribute = 'my value' instance_of.attribute instance_of.__dict__ = {}
async def track_reference(self, Game, send_response_channel): now = trio.current_time() if self.depart_time <= now: self.delay = self.depart_time - now print('{0} - Tracking reference...'.format(self.name)) self.close = False ck = 0 dl = 1.0 # course tick if not self.status == 'Replan': try: self.check_if_car_is_in_spot(Game) except: st() if self.check_if_car_is_in_spot(Game): print('Car is in a parking spot') self.parked = True while not self.check_clear_before_unparking(Game): await trio.sleep(0.1) self.status = 'Driving' self.parked = False # including a failure in 20% of cars failidx = len(self.ref) chance = random.randint(1, 100) # changed to 0!!! if not self.replan: if len(self.ref) - 1 > 4 and chance <= 0: failidx = np.random.randint(low=4, high=6, size=1) if self.parking: print('{0} will fail at acceptable spot: {1}'.format( self.name, failidx)) else: print('{0} will fail in narrow path: {1}'.format( self.name, failidx)) elif len(self.ref) - 1 > 10 and chance <= 0: failidx = np.random.randint(low=len(self.ref) - 5, high=len(self.ref) - 1, size=1) if self.parking: print('{0} will fail in narrow path: {1}'.format( self.name, failidx)) else: print('{0} will fail at acceptable spot: {1}'.format( self.name, failidx)) # start tracking segments for i in range(0, len(self.ref) - 1): #print('{0} self.unparking'.format(self.name)) #print(self.unparking) if (i == failidx): print('{0} Failing'.format(self.name)) await self.failure(send_response_channel) return if i >= 1: self.unparking = False self.close = False if self.check_car_close_2_spot(Game): self.close = True self.status = 'Driving' path = self.ref[:][i] cx = path[:, 0] * SCALE_FACTOR_PLAN cy = path[:, 1] * SCALE_FACTOR_PLAN cyaw = np.deg2rad(path[:, 2]) * -1 state = np.array([self.x, self.y, self.yaw]) # check direction of the segment self.direction = tracking.check_direction(path) sp = tracking.calc_speed_profile(cx, cy, cyaw, TARGET_SPEED, TARGET_SPEED, self.direction) initial_state = State(x=state[0], y=state[1], yaw=state[2], v=self.v) await self.track_async(cx, cy, cyaw, ck, sp, dl, initial_state, TARGET_SPEED, Game, send_response_channel) await trio.sleep(0) if self.status == 'Replan': return if not self.status == 'Failure': self.last_segment = True state = np.array([self.x, self.y, self.yaw]) path = self.ref[:][-1] cx = path[:, 0] * SCALE_FACTOR_PLAN cy = path[:, 1] * SCALE_FACTOR_PLAN cyaw = np.deg2rad(path[:, 2]) * -1 self.direction = tracking.check_direction(path) initial_state = State(x=state[0], y=state[1], yaw=state[2], v=self.v) sp = tracking.calc_speed_profile(cx, cy, cyaw, TARGET_SPEED / 2, 0.0, self.direction) await self.track_async(cx, cy, cyaw, ck, sp, dl, initial_state, 0.0, Game, send_response_channel) if self.status == 'Replan': return self.status = 'Completed' self.is_at_pickup = self.check_at_pickup(Game) if self.is_at_pickup: self.retrieving = False self.last_segment = False if self.check_if_car_is_in_spot(Game): self.parked = True self.parking = False await self.send_response(send_response_channel)