Esempio n. 1
0
 def oof(self, params, best_rounds, sub, do_logit=True):
     stacker_train = np.zeros((self.X.shape[0], 1))
     dtest = xgb.DMatrix(data=self.test.values)
     idx = []
     for index, (trn_idx, val_idx) in enumerate(self.skf.split(self.X, self.y)):
         idx.append(trn_idx)
         '''
         trn_x, val_x = self.X[trn_idx], self.X[val_idx]
         trn_y, val_y = self.y[trn_idx], self.y[val_idx]
         dtrn = xgb.DMatrix(data=trn_x, label=trn_y)
         dval = xgb.DMatrix(data=val_x, label=val_y)
         # train model
         logging.info('Train model in fold {0}'.format(index))
         cv_model = xgb.train(
             params=params,
             dtrain=dtrn,
             num_boost_round=best_rounds,
             verbose_eval=10,
         )
         logging.info('Predict in fold {0}'.format(index))
         prob = cv_model.predict(dtest, ntree_limit=best_rounds)
         stacker_train[val_idx,0] = cv_model.predict(dval, ntree_limit=best_rounds)
         sub['target'] += prob / self.N
         '''
     if do_logit:
         sub['target'] = 1 / (1 + np.exp(-sub['target']))
         stacker_train = 1 / (1 + np.exp(-stacker_train))
     logging.info('{0} of folds'.format(self.N))
     logging.info('Oof by single xgboost model Done')
     pickle.dump(idx, open('xgb.pkl', 'w'))
     st(context=21)
     return sub, stacker_train
Esempio n. 2
0
def df_filter_row_by_id():
    """
    功能
        根据A表提供的ID 对B表的ID进行过滤
    """
    st(context=21)
    ret = {}
    try:
        # 获取相应数据
        store = pd.HDFStore(HDF5_path)
        df_name = request.args.get('origin_samples', None)
        assert HDF5_PREF+df_name+DATA_SUFF in store.keys(), \
            "dataframe %s not in store %s"%(HDF5_PREF+df_name+DATA_SUFF, store.filename)
        df_id_name = request.args.get('id_candicate', None)
        assert HDF5_PREF+df_id_name+DATA_SUFF in store.keys(), \
            "dataframe %s not in store %s"%(HDF5_PREF+df_id_name+DATA_SUFF, store.filename)
        new_dataframe_name = request.args.get('new_dataframe_name', None)
        df = store[df_name+DATA_SUFF]
        ix.tag_meta_auto(df)
        tmp1 = store[df_id_name+DATA_SUFF]["DESYNPUF_ID"]
        def f1(*args):
            id = args[0]
            return id in [a for a in tmp1]
        cnt_rows = ix.select_rows_by_function(df, f1)
        df = ix.filter_rows(df, cnt_rows)
        update_df_in_HDFStore_by_name(store, new_dataframe_name, df)
        # assert df_name!=new_dataframe_name, "df_name and new_dataframe_name cannot be the same"
        # store.put(new_dataframe_name+DATA_SUFF, df)
        ret['info'] = 'affects %s number of rows'%(str(sum(cnt_rows)))
        store.close()
    except Exception, e:
        store.close()
        return render_template('dc_error.html', e_message=e)
Esempio n. 3
0
def create_distribution_fig(df, df_name, col_name, dtype):
    """
    说明
        根据df某一列的数据类型生成分布图
    入参
        df : df信息
        df_name : 代表df名称的字符
        col_name : df中某一列的信息
        dtype : 该列的数据类型
    出参
        将该列对应的distribution fig信息拼接成字符串
            figname1#figname2...
        如果没有信息则返回为空
    """
    ret = []
    for figtype in map_dtype_figtype.get(dtype):
        try:
            ax = df[col_name].plot(kind=figtype) if figtype not in need_value_count_figtype \
                    else df[col_name].value_counts().plot(kind=figtype)
            fig = ax.get_figure()
            # 由于要作为url 如果col_name中有特殊字符(如'.')  则需要被替换掉
            # st(context=21)
            fig_name = str(df_name)+'_'+str(col_name.replace('.', '_'))+'_'+str(figtype)+'.png'
            fig.savefig(fig_dir+fig_name)
            print fig_name
            ret.append(fig_name)
            fig.clf()
        except Exception,e:
            st(context=21)
Esempio n. 4
0
    def display_images(self, visuals, epoch, table=True, phase='train'):
        idx = self._get_display_id(phase)
        if self.display_id > 0:
            if table:
                for i, (label, image_numpy) in enumerate(visuals.items()):
                    if i == 0:
                        image_conc = self.tensor2im(image_numpy)
                        # input_shape = image_conc.shape
                        # image_conc = image_conc.resize(self.outputSize)
                        label_conc = label
                    else:
                        if 'sem' in label:
                            from .util import labels_to_colors
                            image = labels_to_colors(image_numpy, self.opt.color_palette).astype(np.uint8).transpose([2,0,1])

                            image_conc = np.concatenate((image_conc, image), axis=1)
                            label_conc += ('\t' + label)
                        else:
                            image = self.tensor2im(image_numpy)  # , imtype=np.uint16, convert_value=(pow(2, 16) - 1))
                            image_conc = np.concatenate((image_conc, image), axis=1)
                            # if input_shape != image_conc.shape:
                            #     image_conc = imresize(image_conc, input_shape[0], interp='bilinear')
                            label_conc += ('\t' + label)

                self.vis.image(image_conc,
                               opts=dict(title='{} Epoch[{}] '.format(self.name, epoch) + label_conc), win=self.display_id + idx)

            else:
                st()
                for label, image_numpy in visuals.items():
                    self.vis.image((self.tensor2im(image_numpy)), opts=dict(title='{} Epoch[{}] '.format(self.name, epoch) + label), win=self.display_id + idx)

                    idx += 1
Esempio n. 5
0
def test_clip_augment(path):
    base_name = os.path.basename(path)
    usr_id = re.sub(r'_nohash_.*$', '', base_name)

    data = read_raw_wav(path)

    shift_size_ms = 100
    data_shifted = Augmentataion.shifts_in_time(data, shift_size_ms)
    pickle.dump(data_shifted, open('../data/input/tmp/' + usr_id + '_shifted.pkl', 'wb'), pickle.HIGHEST_PROTOCOL)

    stretch_rate = 0.8
    data_stretch = Augmentataion.stretch(data, stretch_rate)
    pickle.dump(data_stretch, open('../data/input/tmp/' + usr_id + '_stretch.pkl', 'wb'), pickle.HIGHEST_PROTOCOL)

    noise_weight = 0.05 # 0.01 - 0.05
    noise_type = RUNNING_TAP
    data_noising = Augmentataion.adds_background_noise(data, noise_type, noise_weight)
    pickle.dump(data_noising, open('../data/input/tmp/' + usr_id + '_noising.pkl', 'wb'), pickle.HIGHEST_PROTOCOL)

    n_steps = 6
    data_pitched = Augmentataion.shifts_in_pitch(data, n_steps)
    pickle.dump(data_pitched, open('../data/input/tmp/' + usr_id + '_pitched.pkl', 'wb'), pickle.HIGHEST_PROTOCOL)

    _,  _, spec = Augmentataion.calculates_spectrogram(data)

    st(context=21)
Esempio n. 6
0
    def get_null_list_for_idx(self, idx):
        a_list = []
        for i_null in self.null_N_set:
            tmp_a = []
            if i_null == 1:
                tmp = [ bX==idx for bX in range(self.N) ]
                tmp_a.append(tmp)

            elif i_null ==2:
                for i_in in range(self.N):
                    if not i_in==idx:
                        tmp = [ bX in [i_in, idx] for bX in range(self.N) ]
                        tmp_a.append(tmp)
            
            elif i_null ==3:
                for i_in in range(self.N):
                    for i2_in in range(self.N):
                        if not (i_in==i2_in or (i_in==idx or i2_in==idx)):
                            tmp = [ ( bX in [i_in, i2_in, idx]) for bX in range(self.N) ]
                            tmp_a.append(tmp)
            elif i_null ==4:
                for i4_in in range(self.N):
                    if not (i4_in==idx):
                        tmp = [ (bX==idx or (not bX==i4_in)) for bX in range(self.N) ]
                        tmp_a.append(tmp)
            else:
                st()
            
            a_list.append(tmp_a)

        return a_list 
Esempio n. 7
0
def print_weights(network):
    first = True
    for m in network.modules():
        if isinstance(m, nn.Conv2d):
            if first:
                first = False
                print('weight: {}'.format(m.weight.data))
                st()
Esempio n. 8
0
def ri2ssos(inp):
    st()
    sz   = inp.shape
    nCh  = int(int(sz[3])/2)
    if nCh == 1:
        out  = tf.sqrt(tf.square(inp[:,:,:,0:nCh])+tf.square(inp[:,:,:,nCh:]))
        return out
    else:
        st()
def dataset_std(root, data_split, tasks):
    input_list = sorted(glob.glob(join(root, 'rgb', data_split, '*.jpg')))
    st()
    targets_list = []
    for task in tasks:
        targets_list.append(
            sorted(glob.glob(join(root, task, data_split, '*.png'))))
    # return list(zip(input_list, targets_list))
    return input_list, targets_list
Esempio n. 10
0
def get_grad_flow(named_parameters):
    ave_grads = []
    layers = []
    for n, p in named_parameters:
        st()
        if (p.requires_grad) and ("bias" not in n):
            layers.append(n)
            ave_grads.append(p.grad.abs().mean())

    return ave_grads
Esempio n. 11
0
def generate_pyc(name):
    st(context=21)
    fp, pathname, description = imp.find_module(name)
    print fp
    print pathname
    print description
    try:
        imp.load_module(name, fp, pathname, description)
    finally:
        if fp:
            fp.close()
Esempio n. 12
0
 def __init__(self, name, G, nCh_out, nCh=16, use_1x1Conv=False, w_decay=0):
     if G == 'UnetINDiv4_CCAM':
         self.net = UnetINDiv4_CCAM
     else:
         st()
     self.name = name
     self.nCh = nCh
     self.nCh_out = nCh_out
     self.reuse = False
     self.use_1x1Conv = use_1x1Conv
     self.w_decay = w_decay
Esempio n. 13
0
 def get_path(self, start, end): 
     #self.get_current_planning_graph()
     try:
         traj, weight = path_planner.get_mpc_path(start,end,self.planning_graph)
     except:
         st()
     if traj and weight < 600:
         return traj, weight
     else: 
         traj = False
         weight = None
         return traj, weight
Esempio n. 14
0
def dc_dataset_register():
    st(context=27)
    paras = {}
    csv_form = ReadCSVForm()
    df_form = ReadDFForm()
    paras['csv_form'] = csv_form
    paras['df_form'] = df_form
    if request.method=='POST':
        if request.form['submit']=='csv':
            paras['file_list'] = filter(None, request.form['file_list'].strip().split(';'))
            paras['nrows_list'] = filter(None, request.form['nrows_list'].strip().split(';'))
            try:
                # 保证长度与内容相等
                assert len(paras['file_list'])==len(paras['nrows_list']), u'file_list与nrows_list长度不相等'
                # df_l [(df名1, df1), (df名2, df2), ...]
                df_l = [
                        ( 'df_'+str(f.split('.')[0]), ix.read_csv_file_to_df(P(f), sep=',', nrows=int(n)) ) if f else None 
                        for (f,n) in zip(paras['file_list'], paras['nrows_list'])
                        ]
                # 以HDF5格式存入本地 ( 暂时用后缀区分data信息和meta信息 ) 
                # 并计算每个dataframe的md5值 并存入数据表中
                with pd.HDFStore(HDF5_path) as store:
                    df_md5_tmp = {}
                    for df in df_l:
                        md5 = calculate_dataframe_md5(df[1])
                        df_name = df[0]+DATA_SUFF
                        df_md5_tmp[df_name] = md5
                        store.put(df_name, df[1])
                    # 与原有的(df_name, md5)进行merge
                    # merge_dataframe_md5(df_md5_tmp)
                paras['df_l_from_csv'] = df_l
                paras['df_nrow'] = 20
                return render_template('dc_dataset_register.html', **paras)
            except Exception,e:
                return render_template('dc_error.html', e_message=e)
        elif  request.form['submit']=='df':
            # df_l [(df名1, df1), (df名2, df2), ...]
            df_l = []
            with pd.HDFStore(request.form['store_path'].strip()) as store:
                for s_k in store.keys():
                    if isinstance(store.get(s_k), pd.DataFrame):
                        if s_k.endswith(DATA_SUFF):
                            df_l.append( (extract_dataframe_name(s_k, HDF5_PREF, ''), store.get(s_k)) )
                        elif s_k.endswith(META_SUFF):
                            df_l.append( (extract_dataframe_name(s_k, HDF5_PREF, ''), store.get(s_k)) )
                        else:
                            pass
            paras['df_l_from_hdf5'] = df_l
            paras['df_nrow'] = 100
            return render_template('dc_dataset_register.html', **paras)
        else:
            return render_template('dc_dataset_register.html', **paras)
Esempio n. 15
0
 def load_opts(opt, exp_name):
     #optLists = ['model','dataroot','savepath','nEpoch','lr','disp_div_N','batchSize','input_nc','gpu_ids','name','use_residual','no_flip','lambda_cost','weight_decay','use_dropout','optimizer','ri','normalize']
     exp_dir = os.path.join(opt.savepath, exp_name)
     with open(os.path.join(exp_dir, 'opt.txt'), 'r') as opt_file:
         for aLine in opt_file.readlines():
             idx = aLine.find(':')
             if idx == -1:
                 continue
             else:
                 cur_opt = aLine[:idx]
                 cur_val = aLine[idx + 2:-1]
                 if cur_opt == 'model':
                     opt.model = cur_val
                 elif cur_opt == 'dataroot':
                     opt.dataroot = cur_val
                 elif cur_opt == 'savepath':
                     opt.savepath = cur_val
                 elif cur_opt == 'nEpoch':
                     opt.savepath = cur_val
                 elif cur_opt == 'lr':
                     opt.lr = float(cur_val)
                 elif cur_opt == 'disp_div_N':
                     opt.disp_div_N = int(cur_val)
                 elif cur_opt == 'batchSize':
                     opt.batchSize = int(cur_val)
                 elif cur_opt == 'input_nc':
                     opt.input_nc = int(cur_val)
                 elif cur_opt == 'gpu_ids':
                     cur_val = cur_val[1:-1]
                     opt.gpu_ids = [int(cur_val)]
                     print('Use GPU id......')
                 elif cur_opt == 'name':
                     opt.name = cur_val
                 elif cur_opt == 'use_residual':
                     opt.use_residual = (cur_val == 'True')
                 elif cur_opt == 'no_flip':
                     opt.use_residual = (cur_val == 'True')
                 elif cur_opt == 'lambda_cost':
                     opt.lambda_cost = float(cur_val)
                 elif cur_opt == 'weight_decay':
                     opt.weight_decay = float(cur_val)
                 elif cur_opt == 'use_dropout':
                     opt.use_dropout = (cur_val == 'True')
                 elif cur_opt == 'optimizer':
                     opt.optimizer = cur_val
                 elif cur_opt == 'ri':
                     opt.ri = (cur_val == 'True')
                 elif cur_opt == 'normalize':
                     opt.normalize = (cur_val == 'True')
                 else:
                     st()
     return opt
Esempio n. 16
0
 def __init__(self, name, G, nCh_out, nCh_seg, nCh=16, w_decay=0):
     if G == 'NVDLMED':
         self.net = NVDLMED
     else:
         st()
     self.name = name
     self.nCh = nCh
     self.nCh_out = nCh_out
     self.nCh_seg = nCh_seg
     self.reuse = False
     self.w_decay = w_decay
     self.reg_ = tf.contrib.layers.l2_regularizer(
         scale=self.w_decay) if self.w_decay > 0 else None
Esempio n. 17
0
 def value_and_gradient(self, X):
     id1 = (X @ self.u1[:-1]) + self.u1[-1]
     id2 = (X @ self.u2[:-1]) + self.u2[-1]
     if self.abs_act:
         id1 = torch.sign(id1)
         id2 = torch.sign(id2)
     else:
         id1 = id1.gt(0) * 1.
         id2 = id2.gt(0) * 1.
     # dy (X @ self.u0[:-1]) + self.u0[-1] + (id1 * sc1).sum(1, keepdims=True) - (id2 * sc2).sum(1, keepdims=True)
     st()
     dy = self.u0.T + id1 @ self.u1.T - id2 @ self.u2.T
     y = (X * dy[:, :-1]).sum(1) + dy[:, -1]
     return y, dy[:, :-1]
 def onclick(event):
     global ix, iy, clicks, coords, ps, clickok
     if clickok:
         clickok = False
         ix, iy = event.xdata, event.ydata
         clicks += 1
         coords.append((ix, iy))
         if clicks % 2: # if odd
             print('x = %d, y = %d'%( ix, iy))
             print('click on another point to set desired heading')
             clickok = True
         else:
             try:
                 dys = coords[1][1] - coords[0][1]
                 dxs = coords[1][0] - coords[0][0]
                 theta = np.arctan2(-dys, dxs) / np.pi * 180
                 print('theta = %d'%(theta))
                 ps.append((coords[0][0], coords[0][1], theta, 0))
                 coords = []
                 start = ps[-2]
                 end = ps[-1]
                 traj, weight = astar_trajectory(simple_graph, start, end)
                 #print(traj)
                 print(weight)
                 st()
                 # while not complete_path_is_safe(traj):
                 #     safe_subpath, safe_start = longest_safe_subpath(traj)
                 #      # TODO: not sure how to generate the path
                 #     new_subpath = astar_trajectory(simple_graph, safe_start, end)
                 #     traj = safe_subpath + new_subpath
                 for start, end in zip(traj, traj[1:]):
                     #print('Start'+str(start))
                     #print(end)
                     segment = segment_to_mpc_inputs(start, end, edge_info)
                     print(segment)
                     plt.plot(segment[0,0], segment[0,1], 'b.')
                     plt.plot(segment[-1,0], segment[-1,1], 'rx')
                     plt.plot(segment[:,0], segment[:,1], 'k--')
                     plt.pause(0.1)
                 print('trajectory plotted!')
                 print('click to set desired xy')
                 clickok = True
                 plt.show()
             except:
                 clickok = True
                 print('CANNOT FIND TRAJECTORY: click again to set xy!')
                 if len(ps) > 1:
                     ps = ps[:-1]
Esempio n. 19
0
 def oof(self, model, params, sub):
     stacker_train = np.zeros((self.X.shape[0], 1))
     for index, (trn_idx, val_idx) in enumerate(self.skf.split(self.X, self.y)):
         trn_x, val_x = self.X[trn_idx], self.X[val_idx]
         trn_y, val_y = self.y[trn_idx], self.y[val_idx]
         # train model
         logging.info('Train model in fold {0}'.format(index))
         history = model.fit(x=trn_x, y=trn_y, validation_data=(val_x, val_y), shuffle=True)
         st(context=21)
         logging.info('Predict in fold {0}'.format(index))
         prob = model.predict(x=self.test)
         stacker_train[val_idx,0] = model.predict(val_x)
         sub['target'] += prob / self.N
     logging.info('{0} of folds'.format(self.N))
     logging.info('Oof by nn model Done')
     return sub, stacker_train
Esempio n. 20
0
 def __iter__(self):
     for df in pd.read_csv(self.input_file_path, chunksize=30000, sep=self.sep):
         # remain target keshi
         if self.target_keshi:
             df = df[df['keshi'].isin(self.target_keshi)]
         df = df.reset_index(drop=True)
         # cut fields
         for col in self.cut_columns:
             df[col + '_cut'] = df[col].astype('object').apply(self.__conduct_jieba_cut)
         # yield sentences
         try:
             for i in df.index:
                 for col in self.wanted_columns:
                     yield df.iloc[i][col + '_cut']
         except Exception,e:
             st(context=21)
             print 'ee'
Esempio n. 21
0
def addNoiseFromPath(path):
    rate,sample = wavfile.read(path)
    if type(sample[0])==np.ndarray:
        sample = sample[:,0]
    levelOfNoise = 0.1*np.average(abs(sample))
    noise = np.random.normal(0,levelOfNoise,len(sample))
    sample += noise
    Fe = 44100
    f, t, Sxx = signal.spectrogram(sample, Fe,nfft=511,nperseg=len(sample)//225)
    st()
    Sxx = np.resize(Sxx, (256,256))
    f = np.resize(f,256)
    t = np.resize(t,256)
    print(Sxx.shape)
    norm = cls.Normalize(vmin=-1.,vmax=1.)
    norm = cls.LogNorm(vmin=Sxx.min(), vmax=Sxx.max())
    img = plt.pcolormesh(t, f, Sxx,norm=norm,cmap='jet')
    return img    
Esempio n. 22
0
 def __init__(self,
              name,
              G,
              nCh_out,
              nCh=16,
              use_1x1Conv=False,
              w_decay=0,
              resid=False):
     if G == 'UnetINMultiDiv8':
         self.net = UnetINMultiDiv8
     else:
         st()
     self.name = name
     self.nCh = nCh
     self.nCh_out = nCh_out
     self.reuse = False
     self.use_1x1Conv = use_1x1Conv
     self.w_decay = w_decay
     self.resid = resid
Esempio n. 23
0
def dc_feature_engineering_datetime():
    """
    功能
        能进入到这里的数据 都保证是datetime类型了
        将datetime列按格式转换, 并进行数值化处理
    """
    st(context=21)
    ret = {}
    try:
        store = pd.HDFStore(HDF5_path)
        df_name = request.args.get('df_name', None)
        new_dataframe_name = request.args.get('new_dataframe_name',None)
        value_as_base = pd.to_datetime(request.args.get('value_as_base'))
        derive_prefix = request.args.get('derive_prefix',None)
        assert HDF5_PREF+df_name+DATA_SUFF in store.keys(), \
            "dataframe %s not in store %s"%(HDF5_PREF+df_name+DATA_SUFF, store.filename)
        df = store[df_name+DATA_SUFF]
        col_name_list = []
        for col_name in request.args.getlist('col_names',None):
            col_name_list.append(col_name.split('.')[1])
            print col_name
        # 将选中的datetime类型的列进行格式转换
        ix.tag_meta_auto(df)
        ix.update_meta(df, col_name_list, "col_datatype","datetime")
        ix.type_casting(df, col_name_list, dt_format="%Y%m%d") 
        store[df_name+DATA_SUFF] = df
        # 将转换后的数据存入新dataframe中
        t_df = ix.derive_columns_from_datetime(
                df, 
                col_name_list, 
                value_as_base=value_as_base,
                inverse=True,
                derive_prefix=derive_prefix)
        if new_dataframe_name!='':
            update_df_in_HDFStore_by_name(store, new_dataframe_name, t_df) 
        # 这里不需要跟新md5 因为格式已经变化
        ret['impact_columns'] = str(len(col_name_list))
        store.close()
        return json.dumps(ret)
    except Exception,e:
        return render_template('dc_error.html', e_message=e)
Esempio n. 24
0
    def display_images(self, visuals, epoch, table=True, phase='train'):
        idx = self._get_display_id(phase)
        if self.display_id > 0:
            if table:
                for i, (label, image_numpy) in enumerate(visuals.items()):
                    if i == 0:
                        image_conc = self.tensor2im(image_numpy)
                        label_conc = label
                    else:
                        if 'sem' in label:
                            from .util import labels_to_colors
                            image = labels_to_colors(
                                image_numpy, self.opt.color_palette).astype(
                                    np.uint8).transpose([2, 0, 1])

                            image_conc = np.concatenate((image_conc, image),
                                                        axis=1)
                            label_conc += ('\t' + label)
                        else:
                            image = self.tensor2im(image_numpy)
                            image_conc = np.concatenate((image_conc, image),
                                                        axis=1)
                            label_conc += ('\t' + label)

                self.vis.image(
                    image_conc,
                    opts=dict(title='{} Epoch[{}] '.format(self.name, epoch) +
                              label_conc),
                    win=self.display_id + idx)

            else:
                st()
                for label, image_numpy in visuals.items():
                    self.vis.image(
                        (self.tensor2im(image_numpy)),
                        opts=dict(
                            title='{} Epoch[{}] '.format(self.name, epoch) +
                            label),
                        win=self.display_id + idx)

                    idx += 1
Esempio n. 25
0
def train_cvx(filename, input_variables, M=20000, abs_act=False, tol=1e-8, beta=1e-4, nesting=0):
    # Load data
    (fs, ifs, ifs_star, out, _) = process_data(filename, 
                                output_regex='^sqJ$', 
                                input_columns=[v for v in input_variables])

    if ifs.shape[0] ==0:
        u1 = torch.zeros((3,1))
        u2 = torch.zeros((3,1))
        u2[-1] = 1. # output will be -1 for every x -> always feasible
        return CvxModel(u1, u2, abs_act=False)

    # Scale
    n = (fs.shape[0] + 2*ifs.shape[0])
    mean = (ifs.sum(0, keepdims=True) + ifs_star.sum(0, keepdims=True) + fs.sum(0, keepdims=True)) / n
    std = (((ifs - mean).square().sum(0, keepdims=True) + 
            (ifs_star - mean).square().sum(0, keepdims=True) +
            (fs - mean).square().sum(0, keepdims=True)) / (n-1)).sqrt()
    std = std.mean() # to maintain the 1-Lishitzity I need to scale all dimensions by the same number.
    u1, u2 = train_network((fs - mean)/std, 
                            (ifs - mean)/std, 
                            (ifs_star - mean)/std, 
                            out / std, abs_act=abs_act, beta=beta)
    
    # Sparsify
    # u1 = u1[:, u1.norm(dim=0)>tol]
    # u2 = u2[:, u2.norm(dim=0)>tol]
    
    # Create model
    model = CvxModel(u1, u2, abs_act=abs_act, mean=mean, std=std)
    loss = ((model(ifs).squeeze() - out.squeeze()).abs().sum() + 
            model(ifs_star).abs().sum() + model(fs).relu().sum()) / n

    if loss >= 1e-6:
        if nesting <= 5:
            st()
            print(f"Loss is too high when training on {filename}. Increasing M, lower beta, and retrying.")
            return train_cvx(filename, input_variables, M=2*M, beta=beta/2, abs_act=abs_act, tol=tol, nesting=nesting+1)
        else:
            raise RecursionError("Maximum recursion depth for failed training reached.")
    return model
Esempio n. 26
0
def func(a, b=[]):
    print(f"{a=}")
    try:
        frame = inspect.currentframe()
        code = frame.f_code
        avs = inspect.getargvalues(frame)
        return avs
        argcount = code.co_argcount  # 2
        freevars = code.co_freevars  # ()
        cellvars = code.co_cellvars  # ()
        nlocals = code.co_nlocals  # 6
        stacksize = code.co_stacksize  # 6
        consts = code.co_consts  # (None, 'a=', 2, 0, ('set_trace',))
        flags = code.co_flags  # 67
        lnotab = code.co_lnotab  # b'\x00\x01\x0e\x01\x02\x01\x08\x01\n\x01\x04\x01\x0c\x00\x06\x01\x06\x01\x06\x01'
        names = code.co_names  # ('print', 'inspect', 'currentframe', 'getargvalues', 'ipdb', 'set_trace')

        gls, lcs = frame.f_globals, frame.f_locals

        funcname = frame.f_code.co_name  # func
        funcscope = frame.f_back
        func = funcscope[funcname]

        filename = code.co_filename  # '/Users/alberthan/VSCodeProjects/vytd/src/youtube-dl/DELETE.py'
        funclineno = code.co_firstlineno  # 4
        line = linecache.get(filename, funclineno)

        empty_tuple_or_zero = [
            code.co_freevars,
            code.co_posonlyargcount,
            code.co_kwonlyargcount,
        ]

        x = 1 + 1
        from ipdb import set_trace as st
        st()
        return x
    except:
        return b
Esempio n. 27
0
 def grid_search_tuning(self, cat_param, cat_param_grid, f_score, n_jobs):
     cat_estimator = cat.CatBoostClassifier(**cat_param)
     cat_gs = GridSearchCV(
         estimator=cat_estimator,
         param_grid=cat_param_grid,
         cv=self.skf,
         scoring=make_scorer(f_score, greater_is_better=True, needs_proba=True),
         verbose=2,
         n_jobs=n_jobs,
         refit=False
     )
     time_begin = time.time()
     cat_gs.fit(self.X, self.y)
     st(context=21)
     time_end = time.time()
     logging.info('Grid search eat time {0} : params {1}'.format(time_end - time_begin, cat_param_grid))
     logging.info('best_score_ : {0}'.format(cat_gs.best_score_))
     logging.info('best_params_ : {0}'.format(cat_gs.best_params_))
     for score in cat_gs.grid_scores_:
         logging.info('grid_scores_ : {0}'.format(score))
     gc.collect()
     return cat_gs.best_score_, cat_gs.best_params_, cat_gs.grid_scores_
Esempio n. 28
0
def dc_select_row_by_expr():
    """
    功能
        根据表达式过滤行
    """
    st(context=21)
    ret = {}
    try:
        store = pd.HDFStore(HDF5_path)
        # 接受ajax传回参数
        df_name = request.args.get('df_name', None)
        assert HDF5_PREF+df_name+DATA_SUFF in store.keys(), \
            "dataframe %s not in store %s"%(HDF5_PREF+df_name+DATA_SUFF, store.filename)
        new_dataframe_name = request.args.get('new_dataframe_name', None)
        rval_expr = request.args.get('rval_expr', None)
        # non_NA_percent = request.args.get('non_NA_percent',None)
        non_NA_percent = 0
        # expr_symbol = urllib2.unquote(request.args.get('expr_symbol', None))
        expr_symbol = '\$' 
        # 按条件过滤数据 
        df = store[df_name+DATA_SUFF]
        ix.tag_meta_auto(df)
        cnt_rows = ix.select_rows_by_expr(
                    df, 
                    expr_symbol=expr_symbol,
                    non_NA_percent=non_NA_percent,
                    rval_expr=rval_expr
                    )
        df = ix.filter_rows(df, cnt_rows)
        # 生成新dataframe 
        update_df_in_HDFStore_by_name(store, new_dataframe_name, df)
        ret['info'] = 'affects %s number of rows'%(str(sum(cnt_rows)))
        store.close()
    except Exception, e:
        store.close()
        return render_template('dc_error.html', e_message=e)
Esempio n. 29
0
    def get_null_list_for_idx(self, idx):
        a_list = []
        for i_null in self.null_N_set:
            tmp_a = []
            if i_null == 1:
                tmp = [bX == idx for bX in range(self.N)]
                tmp_a.append(tmp)

            elif i_null == 2:
                for i_in in range(self.N):
                    if not i_in == idx:
                        tmp = [bX in [i_in, idx] for bX in range(self.N)]
                        tmp_a.append(tmp)

            elif i_null == 3:
                for i_in in range(self.N):
                    for ii_in in range(self.N):
                        if not (i_in == ii_in or
                                (i_in == idx or ii_in == idx)):
                            tmp = [(bX in [i_in, ii_in, idx])
                                   for bX in range(self.N)]
                            tmp_a.append(tmp)

            elif i_null == 4:
                for i_in in range(self.N):
                    for ii_in in range(self.N):
                        for iii_in in range(self.N):
                            if not ((i_in == ii_in or i_in == iii_in
                                     or ii_in == iii_in) or
                                    (i_in == idx or ii_in == idx
                                     or iii_in == idx)):
                                tmp = [(bX in [i_in, ii_in, iii_in, idx])
                                       for bX in range(self.N)]
                                tmp_a.append(tmp)
            elif i_null == 5:
                for i4_in in range(self.N):
                    for i5_in in range(self.N):
                        for i6_in in range(self.N):
                            if not ((idx in [i4_in, i5_in, i6_in]) or
                                    (i4_in == i5_in or i4_in == i6_in
                                     or i5_in == i6_in)):
                                tmp = [(bX == idx)
                                       or not (bX in [i4_in, i5_in, i6_in])
                                       for bX in range(self.N)]
                                tmp_a.append(tmp)
            elif i_null == 6:
                for i5_in in range(self.N):
                    for i6_in in range(self.N):
                        if not (idx == i5_in or idx == i6_in
                                or i5_in == i6_in):
                            tmp = [(bX == idx or not (bX in [i5_in, i6_in]))
                                   for bX in range(self.N)]
                            tmp_a.append(tmp)
            elif i_null == 7:
                for i6_in in range(self.N):
                    if not (i6_in == idx):
                        tmp = [(bX == idx or (not bX == i6_in))
                               for bX in range(self.N)]
                        tmp_a.append(tmp)
            else:
                st()

            a_list.append(tmp_a)

        return a_list
Esempio n. 30
0
    def getBatch_RGB_varInp_tarid_missid(self, start, end, tar_id, miss_id):

        nB = end - start
        end = min([end, self.len])
        start = end - nB
        batch = self.flist[start:end]
        # channel First :
        sz_a = [nB, self.nCh_out, self.nY, self.nX]
        sz_M = [nB, 1, self.nY, self.nX]

        target_class_idx = np.empty([nB, 1], dtype=np.uint8)
        a_img = np.empty(sz_a, dtype=np.float32)
        b_img = np.empty(sz_a, dtype=np.float32)
        c_img = np.empty(sz_a, dtype=np.float32)
        d_img = np.empty(sz_a, dtype=np.float32)
        e_img = np.empty(sz_a, dtype=np.float32)
        f_img = np.empty(sz_a, dtype=np.float32)
        g_img = np.empty(sz_a, dtype=np.float32)
        n_img = np.empty(sz_a, dtype=np.float32)
        target_img = np.empty(sz_a, dtype=np.float32)

        a_mask = np.zeros(sz_M, dtype=np.float32)
        b_mask = np.zeros(sz_M, dtype=np.float32)
        c_mask = np.zeros(sz_M, dtype=np.float32)
        d_mask = np.zeros(sz_M, dtype=np.float32)
        e_mask = np.zeros(sz_M, dtype=np.float32)
        f_mask = np.zeros(sz_M, dtype=np.float32)
        g_mask = np.zeros(sz_M, dtype=np.float32)
        n_mask = np.zeros(sz_M, dtype=np.float32)

        targ_idx = tar_id  #random.randint(0,self.N-1)
        # Here, choose the random null idx in the set, and which is not in the target
        if tar_id == miss_id:
            N_for_null = 0
        else:
            N_for_null = 1  # random.randint(0,len(self.null_N_set)-1)
        # 0: 7-->1 map
        # 6: 1-->1 map
        cur_list = self.list_for_null[targ_idx][N_for_null]

        if len(cur_list) == 1:
            tar_class_bools = cur_list[0]
        else:
            if miss_id > tar_id:
                s = -1
            else:
                s = 0
            tar_class_bools = cur_list[miss_id +
                                       s]  #random.randint(0,len(cur_list)-1)]

        for iB, aFname in enumerate(batch):
            aug_idx = random.randint(0, 1)
            a_tmp = np.ndarray.astype(self.read_png(
                join(self.root, aFname + self.fExp[0] + '.png')),
                                      dtype=np.float32)
            b_tmp = np.ndarray.astype(self.read_png(
                join(self.root, aFname + self.fExp[1] + '.png')),
                                      dtype=np.float32)
            c_tmp = np.ndarray.astype(self.read_png(
                join(self.root, aFname + self.fExp[2] + '.png')),
                                      dtype=np.float32)
            d_tmp = np.ndarray.astype(self.read_png(
                join(self.root, aFname + self.fExp[3] + '.png')),
                                      dtype=np.float32)
            e_tmp = np.ndarray.astype(self.read_png(
                join(self.root, aFname + self.fExp[4] + '.png')),
                                      dtype=np.float32)
            f_tmp = np.ndarray.astype(self.read_png(
                join(self.root, aFname + self.fExp[5] + '.png')),
                                      dtype=np.float32)
            g_tmp = np.ndarray.astype(self.read_png(
                join(self.root, aFname + self.fExp[6] + '.png')),
                                      dtype=np.float32)
            n_tmp = np.ndarray.astype(self.read_png(
                join(self.root, aFname + self.fExp[7] + '.png')),
                                      dtype=np.float32)

            if self.use_aug:
                if aug_idx == 1:
                    a_tmp = np.flip(a_tmp, axis=3)
                    b_tmp = np.flip(b_tmp, axis=3)
                    c_tmp = np.flip(c_tmp, axis=3)
                    d_tmp = np.flip(d_tmp, axis=3)
                    e_tmp = np.flip(e_tmp, axis=3)
                    f_tmp = np.flip(f_tmp, axis=3)
                    g_tmp = np.flip(g_tmp, axis=3)
                    n_tmp = np.flip(n_tmp, axis=3)
            if self.use_norm_std:
                a_img[iB, :, :, :] = a_tmp[:, :, :, 0] / np.std(a_tmp)
                b_img[iB, :, :, :] = b_tmp[:, :, :, 0] / np.std(b_tmp)
                c_img[iB, :, :, :] = c_tmp[:, :, :, 0] / np.std(c_tmp)
                d_img[iB, :, :, :] = d_tmp[:, :, :, 0] / np.std(d_tmp)
                e_img[iB, :, :, :] = e_tmp[:, :, :, 0] / np.std(e_tmp)
                f_img[iB, :, :, :] = f_tmp[:, :, :, 0] / np.std(f_tmp)
                g_img[iB, :, :, :] = g_tmp[:, :, :, 0] / np.std(g_tmp)
                n_img[iB, :, :, :] = n_tmp[:, :, :, 0] / np.std(n_tmp)
            else:
                scale = 255.0
                a_img[iB, :, :, :] = a_tmp[:, :, :, 0] / scale
                b_img[iB, :, :, :] = b_tmp[:, :, :, 0] / scale
                c_img[iB, :, :, :] = c_tmp[:, :, :, 0] / scale
                d_img[iB, :, :, :] = d_tmp[:, :, :, 0] / scale
                e_img[iB, :, :, :] = e_tmp[:, :, :, 0] / scale
                f_img[iB, :, :, :] = f_tmp[:, :, :, 0] / scale
                g_img[iB, :, :, :] = g_tmp[:, :, :, 0] / scale
                n_img[iB, :, :, :] = n_tmp[:, :, :, 0] / scale

            if targ_idx == 0:
                target_img[iB, :, :, :] = a_img[iB, :, :, :]
                a_mask[iB, 0, :, :] = 1.
            elif targ_idx == 1:
                target_img[iB, :, :, :] = b_img[iB, :, :, :]
                b_mask[iB, 0, :, :] = 1.
            elif targ_idx == 2:
                target_img[iB, :, :, :] = c_img[iB, :, :, :]
                c_mask[iB, 0, :, :] = 1.
            elif targ_idx == 3:
                target_img[iB, :, :, :] = d_img[iB, :, :, :]
                d_mask[iB, 0, :, :] = 1.
            elif targ_idx == 4:
                target_img[iB, :, :, :] = e_img[iB, :, :, :]
                e_mask[iB, 0, :, :] = 1.
            elif targ_idx == 5:
                target_img[iB, :, :, :] = f_img[iB, :, :, :]
                f_mask[iB, 0, :, :] = 1.
            elif targ_idx == 6:
                target_img[iB, :, :, :] = g_img[iB, :, :, :]
                g_mask[iB, 0, :, :] = 1.
            elif targ_idx == 7:
                target_img[iB, :, :, :] = n_img[iB, :, :, :]
                n_mask[iB, 0, :, :] = 1.
            else:
                st()
            target_class_idx[iB] = targ_idx
        return target_class_idx, a_img, b_img, c_img, d_img, e_img, f_img, g_img, n_img, a_mask, b_mask, c_mask, d_mask, e_mask, f_mask, g_mask, n_mask, tar_class_bools, target_img
Esempio n. 31
0
    def getBatch_RGB(self, start, end):
        end = min([end, self.len])
        batch = self.flist[start:end]
        # channel First :
        sz_a = [end - start, self.nCh_out, self.nY, self.nX]
        sz_M = [end - start, 1, self.nY, self.nX]

        target_class_idx = np.empty([end - start, 1], dtype=np.uint8)
        a_img = np.empty(sz_a, dtype=np.float32)
        b_img = np.empty(sz_a, dtype=np.float32)
        c_img = np.empty(sz_a, dtype=np.float32)
        d_img = np.empty(sz_a, dtype=np.float32)
        e_img = np.empty(sz_a, dtype=np.float32)
        f_img = np.empty(sz_a, dtype=np.float32)
        g_img = np.empty(sz_a, dtype=np.float32)
        n_img = np.empty(sz_a, dtype=np.float32)
        target_img = np.empty(sz_a, dtype=np.float32)

        a_mask = np.zeros(sz_M, dtype=np.float32)
        b_mask = np.zeros(sz_M, dtype=np.float32)
        c_mask = np.zeros(sz_M, dtype=np.float32)
        d_mask = np.zeros(sz_M, dtype=np.float32)
        e_mask = np.zeros(sz_M, dtype=np.float32)
        f_mask = np.zeros(sz_M, dtype=np.float32)
        g_mask = np.zeros(sz_M, dtype=np.float32)
        n_mask = np.zeros(sz_M, dtype=np.float32)
        targ_idx = random.randint(0, self.N - 1)
        tar_class_bools = [x == targ_idx for x in range(self.N)]

        for iB, aFname in enumerate(batch):
            aug_idx = random.randint(0, 1)
            a_tmp = np.ndarray.astype(self.read_png(
                join(self.root, aFname + self.fExp[0] + '.png')),
                                      dtype=np.float32)
            b_tmp = np.ndarray.astype(self.read_png(
                join(self.root, aFname + self.fExp[1] + '.png')),
                                      dtype=np.float32)
            c_tmp = np.ndarray.astype(self.read_png(
                join(self.root, aFname + self.fExp[2] + '.png')),
                                      dtype=np.float32)
            d_tmp = np.ndarray.astype(self.read_png(
                join(self.root, aFname + self.fExp[3] + '.png')),
                                      dtype=np.float32)
            e_tmp = np.ndarray.astype(self.read_png(
                join(self.root, aFname + self.fExp[4] + '.png')),
                                      dtype=np.float32)
            f_tmp = np.ndarray.astype(self.read_png(
                join(self.root, aFname + self.fExp[5] + '.png')),
                                      dtype=np.float32)
            g_tmp = np.ndarray.astype(self.read_png(
                join(self.root, aFname + self.fExp[6] + '.png')),
                                      dtype=np.float32)
            n_tmp = np.ndarray.astype(self.read_png(
                join(self.root, aFname + self.fExp[7] + '.png')),
                                      dtype=np.float32)

            if self.use_aug:
                if aug_idx == 1:
                    a_tmp = np.flip(a_tmp, axis=3)
                    b_tmp = np.flip(b_tmp, axis=3)
                    c_tmp = np.flip(c_tmp, axis=3)
                    d_tmp = np.flip(d_tmp, axis=3)
                    e_tmp = np.flip(e_tmp, axis=3)
                    f_tmp = np.flip(f_tmp, axis=3)
                    g_tmp = np.flip(g_tmp, axis=3)
                    n_tmp = np.flip(n_tmp, axis=3)
            if self.use_norm_std:
                a_img[iB, :, :, :] = a_tmp[:, :, :, 0] / np.std(a_tmp)
                b_img[iB, :, :, :] = b_tmp[:, :, :, 0] / np.std(b_tmp)
                c_img[iB, :, :, :] = c_tmp[:, :, :, 0] / np.std(c_tmp)
                d_img[iB, :, :, :] = d_tmp[:, :, :, 0] / np.std(d_tmp)
                e_img[iB, :, :, :] = e_tmp[:, :, :, 0] / np.std(e_tmp)
                f_img[iB, :, :, :] = f_tmp[:, :, :, 0] / np.std(f_tmp)
                g_img[iB, :, :, :] = g_tmp[:, :, :, 0] / np.std(g_tmp)
                n_img[iB, :, :, :] = n_tmp[:, :, :, 0] / np.std(n_tmp)
            else:
                scale = 255.0
                a_img[iB, :, :, :] = a_tmp[:, :, :, 0] / scale
                b_img[iB, :, :, :] = b_tmp[:, :, :, 0] / scale
                c_img[iB, :, :, :] = c_tmp[:, :, :, 0] / scale
                d_img[iB, :, :, :] = d_tmp[:, :, :, 0] / scale
                e_img[iB, :, :, :] = e_tmp[:, :, :, 0] / scale
                f_img[iB, :, :, :] = f_tmp[:, :, :, 0] / scale
                g_img[iB, :, :, :] = g_tmp[:, :, :, 0] / scale
                n_img[iB, :, :, :] = n_tmp[:, :, :, 0] / scale

            if targ_idx == 0:
                target_img[iB, :, :, :] = a_img[iB, :, :, :]
                a_mask[iB, 0, :, :] = 1.
            elif targ_idx == 1:
                target_img[iB, :, :, :] = b_img[iB, :, :, :]
                b_mask[iB, 0, :, :] = 1.
            elif targ_idx == 2:
                target_img[iB, :, :, :] = c_img[iB, :, :, :]
                c_mask[iB, 0, :, :] = 1.
            elif targ_idx == 3:
                target_img[iB, :, :, :] = d_img[iB, :, :, :]
                d_mask[iB, 0, :, :] = 1.
            elif targ_idx == 4:
                target_img[iB, :, :, :] = e_img[iB, :, :, :]
                e_mask[iB, 0, :, :] = 1.
            elif targ_idx == 5:
                target_img[iB, :, :, :] = f_img[iB, :, :, :]
                f_mask[iB, 0, :, :] = 1.
            elif targ_idx == 6:
                target_img[iB, :, :, :] = g_img[iB, :, :, :]
                g_mask[iB, 0, :, :] = 1.
            elif targ_idx == 7:
                target_img[iB, :, :, :] = n_img[iB, :, :, :]
                n_mask[iB, 0, :, :] = 1.
            else:
                st()
            target_class_idx[iB] = targ_idx
        return target_class_idx, a_img, b_img, c_img, d_img, e_img, f_img, g_img, n_img, a_mask, b_mask, c_mask, d_mask, e_mask, f_mask, g_mask, n_mask, tar_class_bools, target_img
def run_one_fold(fold):
    # read whole train / test data for tokenizer
    df_train = read_train_data()
    df_test = read_test_data()

    # fit tokenizer
    tokenizer = get_fitted_tokenizer(df_train, df_test)
    word_index = tokenizer.word_index
    transformers_count = 0
    all_words = set(word_index.keys())
    for toxic, transformers in toxicIndicator_transformers.items():
        for transformer in transformers:
            if transformer == toxic:
                continue
            if transformer in all_words:
                transformers_count += tokenizer.word_counts[transformer]
    print('toxic transformer count : {0}'.format(transformers_count))
    print('unique token : {0}'.format(len(word_index)))

    # get embedding lookup table
    embedding_dim = 300
    embedding_path = '../data/input/glove_dir/glove.840B.300d.txt'
    # embedding_path = '../data/input/fasttext_dir/fasttext.300d.txt'
    embedding_lookup_table = get_embedding_lookup_table(word_index, embedding_path, embedding_dim)

    # read in fold data
    df_trn, df_val = read_data_in_fold(fold)

    # prepare data : pre truncating and post truncating
    X_test_pre = get_padded_pre_sequence(tokenizer, df_test[COMMENT_COL].astype('str').values.tolist())
    X_test_post = get_padded_post_sequence(tokenizer, df_test[COMMENT_COL].astype('str').values.tolist())
    id_test = df_test[ID_COL].values.tolist()
    print('Test data pre shape {0}'.format(X_test_pre.shape))
    print('Test data post shape {0}'.format(X_test_post.shape))

    if PRE_OR_POST=='pre':
        X_trn = get_padded_pre_sequence(tokenizer, df_trn[COMMENT_COL].astype('str').values.tolist())
        y_trn = df_trn[label_candidates].values
        print('Fold {0} train data pre shape {1} '.format(fold, X_trn.shape))
        X_val = get_padded_pre_sequence(tokenizer, df_val[COMMENT_COL].astype('str').values.tolist())
        y_val = df_val[label_candidates].values
        id_val = df_val[ID_COL].values.tolist()
        print('Fold {0} valid data pre shape {1} '.format(fold, X_val.shape))

    if PRE_OR_POST=='post':
        X_trn = get_padded_post_sequence(tokenizer, df_trn[COMMENT_COL].astype('str').values.tolist())
        y_trn = df_trn[label_candidates].values
        print('Fold {0} train data post shape {1} '.format(fold, X_trn.shape))
        X_val = get_padded_post_sequence(tokenizer, df_val[COMMENT_COL].astype('str').values.tolist())
        y_val = df_val[label_candidates].values
        id_val = df_val[ID_COL].values.tolist()
        print('Fold {0} valid data post shape {1} '.format(fold, X_val.shape))

    # preds result array
    preds_test_pre = np.zeros((X_test_pre.shape[0], NUM_OF_LABEL))
    preds_test_post = np.zeros((X_test_post.shape[0], NUM_OF_LABEL))
    assert preds_test_pre.shape and preds_test_post.shape, 'test data pre and post shape not match'
    preds_valid = np.zeros((X_val.shape[0], NUM_OF_LABEL))

    # train model
    for run in range(RUNS_IN_FOLD):
        print('\nFold {0} run {1} begin'.format(fold, run))

        # model
        model = get_model(embedding_lookup_table, float(FLAGS.dp), float(FLAGS.sdp))
        # print(model.summary())

        if mode == 'try':
            st(context=3)

        # callbacks
        val_auc = RocAucMetricCallback()
        es = EarlyStopping(monitor=VAL_AUC, mode='max', patience=5)
        bst_model_path = \
            '../data/output/model/{0}fold_{1}run_{2}dp_{3}sdp_pool_cnn.h5'.format(
                fold, run, FLAGS.dp, FLAGS.sdp)
        mc = ModelCheckpoint(bst_model_path, save_best_only=True, save_weights_only=True)
        rp = ReduceLROnPlateau(
            monitor=VAL_AUC, mode='max',
            patience=2,
            cooldown=1,
            factor=np.sqrt(0.1),
            min_lr=0.0006,
            verbose=1
        )
        # train
        hist = model.fit(
            x=X_trn, y=y_trn,
            validation_data=(X_val, y_val),
            epochs=EPOCHS,
            batch_size=BATCH_SIZE,
            shuffle=True,
            callbacks=[val_auc, es, mc, rp]
        )
        model.load_weights(bst_model_path)
        bst_val_score = max(hist.history[VAL_AUC])
        print('\nFold {0} run {1} best val score : {2}'.format(fold, run, bst_val_score))

        # predict
        print('\nFold {0} run {1} predict on test pre truncating'.format(fold, run))
        preds_test_pre += model.predict(X_test_pre, batch_size=256, verbose=1) / RUNS_IN_FOLD
        print('\nFold {0} run {1} predict on test post truncating'.format(fold, run))
        preds_test_post += model.predict(X_test_post, batch_size=256, verbose=1) / RUNS_IN_FOLD
        print('\nFold {0} run {1} predict on valid'.format(fold, run))
        preds_valid += model.predict(X_val, batch_size=256, verbose=1) / RUNS_IN_FOLD
        print('\nFold {0} run {1} done'.format(fold, run))

        del model
        gc.collect()

    # record preds result
    preds_test_avg = ( preds_test_pre + preds_test_post ) / 2.0
    preds_test = preds_test_avg.T
    df_preds_test = pd.DataFrame()
    df_preds_test[ID_COL] = id_test
    for idx, label in enumerate(label_candidates):
        df_preds_test[label] = preds_test[idx]
    df_preds_test.to_csv(
        '../data/output/preds/pool_cnn/{0}/{1}/{2}fold_test.csv'.format(FLAGS.dp, FLAGS.sdp, fold), index=False)

    preds_valid = preds_valid.T
    df_preds_val = pd.DataFrame()
    df_preds_val[ID_COL] = id_val
    for idx, label in enumerate(label_candidates):
        df_preds_val[label] = preds_valid[idx]
    df_preds_val.to_csv(
        '../data/output/preds/pool_cnn/{0}/{1}/{2}fold_valid.csv'.format(FLAGS.dp, FLAGS.sdp, fold), index=False)
Esempio n. 33
0
        print str(top1.avg) + ' ' + str(loss.data[0]) + ' ' + 'batch_valid ' + str(i)
    # update better performance model
    global best_score
    if top1.avg > best_score:
        torch.save(model, args.save)
        print 'save model'
        best_score = top1.avg
    print str(top1.avg) + ' ' + str(loss.data[0]) + ' ' + 'epoch_valid ' + str(epoch)


# Loop over epochs.
lr = args.lr
best_val_loss = None

# At any point you can hit Ctrl + C to break out of training early.
st(context=27)
best_score = 0
try:
    optimizer = optim.Adam(model.parameters(), lr=args.lr)
    for epoch in range(1, args.epochs + 1):
        train(epoch, optimizer, questrainfealistShu, labeltrainlistShu, lengthtrainlistShu)
        valid(epoch, questrainfealistShu_valid, labeltrainlistShu_valid, lengthtrainlistShu_valid)
except KeyboardInterrupt:
    print('-' * 89)
    print('Exiting from training early')


def test(model, quesfeaShu, labelShu, lengthShu):

    model.eval()
Esempio n. 34
0
    for c in connected_clients:
        if (c not in [client, server]):
            c.sendall(data)
            
if __name__ == '__main__':
    # 创建服务端监听socket
    server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    # 在bind前设置端口复用
    # server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
    server_socket.bind((HOST, PORT))
    server_socket.listen(LISTEN)
    print 'Server listen in port %s ' % (PORT)
    connected_clients.append(server_socket)
    while 1:
        r, w, e = select.select(connected_clients, [], [], 20)
        st(context=17)
        for s in r:
            if s is server_socket:
                conn, addr = server_socket.accept()
                connected_clients.append(conn)
            else:
                data = s.recv(4096)
                if not data:
                    connected_clients.remove(s)
                else:
                    print '<(%s, %s)> : ' % addr, data
                    broadcast_all(data, s, server_socket)
        for s in w:
            pass
        for s in e:
            pass
def run_one_fold(fold):
    # read whole train / test data for tokenizer
    df_train = read_train_data()
    df_test = read_test_data()

    # fit tokenizer
    tokenizer = get_fitted_tokenizer(df_train, df_test)
    word_index = tokenizer.word_index
    transformers_count = 0
    all_words = set(word_index.keys())
    for toxic, transformers in toxicIndicator_transformers.items():
        for transformer in transformers:
            if transformer==toxic:
                continue
            if transformer in all_words:
                transformers_count += tokenizer.word_counts[transformer]
                # print(transformer)
    print('toxic transformer count : {0}'.format(transformers_count))
    print('unique token : {0}'.format(len(word_index)))

    # get embedding lookup table
    embedding_dim = 300
    # fasttext_path = '../data/input/fasttext_dir/fasttext.300d.txt'
    # embedding_lookup_table = get_embedding_lookup_table(word_index, fasttext_path, embedding_dim)
    glove_path = '../data/input/glove_dir/glove.840B.300d.txt'
    # glove_path = '../data/input/glove_dir/glove.6B.{0}d.txt'.format(embedding_dim)
    embedding_lookup_table = get_embedding_lookup_table(word_index, glove_path, embedding_dim)

    # read in fold data
    df_trn, df_val = read_data_in_fold(fold)

    # prepare data
    X_test = get_padded_sequence(tokenizer, df_test[COMMENT_COL].astype('str').values.tolist())
    id_test = df_test[ID_COL].values.tolist()
    print('Test data shape {0}'.format(X_test.shape))

    X_trn = get_padded_sequence(tokenizer, df_trn[COMMENT_COL].astype('str').values.tolist())
    y_trn = df_trn[label_candidates].values
    print('Fold {0} train data shape {1} '.format(fold, X_trn.shape))

    X_val = get_padded_sequence(tokenizer, df_val[COMMENT_COL].astype('str').values.tolist())
    y_val = df_val[label_candidates].values
    id_val = df_val[ID_COL].values.tolist()
    print('Fold {0} valid data shape {1} '.format(fold, X_val.shape))

    # preds result array
    preds_test = np.zeros((X_test.shape[0], NUM_OF_LABEL))
    preds_valid = np.zeros((X_val.shape[0], NUM_OF_LABEL))

    # train model
    for run in range(RUNS_IN_FOLD):
        print('\nFold {0} run {1} begin'.format(fold, run))

        # model
        model = get_model(embedding_lookup_table, float(FLAGS.dp), float(FLAGS.sdp))
        print(model.summary())
        st()

        # callbacks
        # es = EarlyStopping(monitor='val_acc', mode='max', patience=3)
        val_auc = RocAucMetricCallback()
        es = EarlyStopping(monitor=VAL_AUC, mode='max', patience=3)
        bst_model_path = '../data/output/model/{0}fold_{1}run_{2}dp_{3}sdp_glove_gru.h5'.format(fold, run, FLAGS.dp, FLAGS.sdp)
        mc = ModelCheckpoint(bst_model_path, save_best_only=True, save_weights_only=True)

        # train
        hist = model.fit(
            x=X_trn, y=y_trn,
            validation_data=(X_val, y_val),
            epochs=EPOCHS,
            batch_size=BATCH_SIZE,
            shuffle=True,
            callbacks=[val_auc, es, mc]
        )
        model.load_weights(bst_model_path)
        # bst_val_score = max(hist.history['val_acc'])
        bst_val_score = max(hist.history[VAL_AUC])
        print('\nFold {0} run {1} best val score : {2}'.format(fold, run, bst_val_score))

        # predict
        preds_test += model.predict(X_test, batch_size=1024, verbose=1) / RUNS_IN_FOLD
        preds_valid += model.predict(X_val, batch_size=1024, verbose=1) / RUNS_IN_FOLD
        print('\nFold {0} run {1} done'.format(fold, run))

        del model
        gc.collect()

    # record preds result
    preds_test = preds_test.T
    df_preds_test = pd.DataFrame()
    df_preds_test[ID_COL] = id_test
    for idx, label in enumerate(label_candidates):
        df_preds_test[label] = preds_test[idx]
    df_preds_test.to_csv('../data/output/preds/glove_gru/{0}/{1}/{2}fold_test.csv'.format(FLAGS.dp, FLAGS.sdp, fold), index=False)

    preds_valid = preds_valid.T
    df_preds_val = pd.DataFrame()
    df_preds_val[ID_COL] = id_val
    for idx, label in enumerate(label_candidates):
        df_preds_val[label] = preds_valid[idx]
    # df_preds_val.to_csv('../data/output/preds/fasttext_gru/{0}fold_valid.csv'.format(fold), index=False)
    df_preds_val.to_csv('../data/output/preds/glove_gru/{0}/{1}/{2}fold_valid.csv'.format(FLAGS.dp, FLAGS.sdp, fold), index=False)
Esempio n. 36
0
    def _build_model(self):
        with tf.device(self.GPU):
            if self.generator == 'unet':
                self.generatorA2B = UnetGenerator(self.opt,
                                                  name='generatorA2B')
                self.generatorB2A = UnetGenerator(self.opt,
                                                  name='generatorB2A')
            elif self.generator == 'unet_residual':
                self.generatorA2B = UnetGenerator(self.opt,
                                                  res=True,
                                                  name='generatorA2B')
                self.generatorB2A = UnetGenerator(self.opt,
                                                  res=True,
                                                  name='generatorB2A')
            else:
                st()

            if self.discriminator == 'basic':
                self.discriminatorA = Discriminator(
                    self.opt,
                    name='discriminatorA',
                    use_sigmoid=self.use_sigmoid)
                self.discriminatorB = Discriminator(
                    self.opt,
                    name='discriminatorB',
                    use_sigmoid=self.use_sigmoid)
            else:
                st()

            self.real_A = tf.placeholder(dtype,
                                         [1, None, self.nX, self.nC * 2],
                                         name='real_A')
            self.real_B = tf.placeholder(dtype,
                                         [1, None, self.nX, self.nC * 2],
                                         name='real_B')

            self.fake_B = self.generatorA2B(self.real_A)
            self.fake_A = self.generatorB2A(self.real_B)
            self.recon_B = self.generatorA2B(self.fake_A)
            self.recon_A = self.generatorB2A(self.fake_B)

            self.iden_B = self.generatorA2B(self.real_B)
            self.iden_A = self.generatorB2A(self.real_A)

            self.DA_fake = self.discriminatorA(self.fake_A)
            self.DB_fake = self.discriminatorB(self.fake_B)

            if self.use_wgan:
                self.gan_loss = -tf.reduce_mean(self.DB_fake) - tf.reduce_mean(
                    self.DA_fake)
                self.cycle_loss = mse_criterion(self.real_A,
                                                self.recon_A) + mse_criterion(
                                                    self.real_B, self.recon_B)
                self.g_loss_a2b = -tf.reduce_mean(
                    self.DB_fake) + self.cyc_lambda * self.cycle_loss
                self.g_loss_b2a = -tf.reduce_mean(
                    self.DA_fake) + self.cyc_lambda * self.cycle_loss
                self.g_loss = self.gan_loss + self.cyc_lambda * self.cycle_loss

            else:
                self.gan_loss = self.criterionGAN(self.DA_fake, tf.ones_like(self.DA_fake)) \
                                + self.criterionGAN(self.DB_fake, tf.ones_like(self.DB_fake))
                self.cycle_loss = mae_criterion(self.real_A,
                                                self.recon_A) + mae_criterion(
                                                    self.real_B, self.recon_B)
                self.g_loss_a2b = self.criterionGAN(self.DB_fake, tf.ones_like(self.DB_fake)) \
                                  + self.cyc_lambda * self.cycle_loss
                self.g_loss_b2a = self.criterionGAN(self.DA_fake, tf.ones_like(self.DA_fake)) \
                                  + self.cyc_lambda * self.cycle_loss
                self.g_loss = self.gan_loss + self.cyc_lambda * self.cycle_loss

            if self.use_identity:
                self.iden_loss = mae_criterion(self.real_A,
                                               self.iden_A) + mae_criterion(
                                                   self.real_B, self.iden_B)
                self.g_loss_a2b = self.g_loss_a2b + self.iden_lambda * self.iden_loss
                self.g_loss_b2a = self.g_loss_b2a + self.iden_lambda * self.iden_loss
                self.g_loss = self.g_loss + self.iden_lambda * self.iden_loss

            self.fake_A_sample = tf.placeholder(
                dtype, [1, None, self.nX, self.nC * 2], name='fake_A_sample')
            self.fake_B_sample = tf.placeholder(
                dtype, [1, None, self.nX, self.nC * 2], name='fake_B_sample')
            self.DA_real = self.discriminatorA(self.real_A)
            self.DB_real = self.discriminatorB(self.real_B)
            self.DA_fake_sample = self.discriminatorA(self.fake_A_sample)
            self.DB_fake_sample = self.discriminatorB(self.fake_B_sample)

            if self.use_wgan:
                self.da_loss_real = -tf.reduce_mean(self.DA_real)
                self.da_loss_fake = tf.reduce_mean(self.DA_fake_sample)
                self.da_loss_GP = gradient_penalty(self.fake_A_sample,
                                                   self.real_A, 1,
                                                   self.discriminatorA)
                self.da_loss = (self.da_loss_real + self.da_loss_fake +
                                self.GP_lambda * self.da_loss_GP) / 2
                self.db_loss_real = -tf.reduce_mean(self.DB_real)
                self.db_loss_fake = tf.reduce_mean(self.DB_fake_sample)
                self.db_loss_GP = gradient_penalty(self.fake_B_sample,
                                                   self.real_B, 1,
                                                   self.discriminatorB)
                self.db_loss = (self.db_loss_real + self.db_loss_fake +
                                self.GP_lambda * self.db_loss_GP) / 2
                self.d_loss = self.da_loss + self.db_loss
            else:
                self.da_loss_real = self.criterionGAN(
                    self.DA_real, tf.ones_like(self.DA_real))
                self.da_loss_fake = self.criterionGAN(
                    self.DA_fake_sample, tf.zeros_like(self.DA_fake_sample))
                self.da_loss = (self.da_loss_real + self.da_loss_fake) / 2
                self.db_loss_real = self.criterionGAN(
                    self.DB_real, tf.ones_like(self.DB_real))
                self.db_loss_fake = self.criterionGAN(
                    self.DB_fake_sample, tf.zeros_like(self.DB_fake_sample))
                self.db_loss = (self.db_loss_real + self.db_loss_fake) / 2
                self.d_loss = self.da_loss + self.db_loss

            self.gan_loss_sum = tf.summary.scalar('generator/gan_loss',
                                                  self.gan_loss)
            self.cycle_loss_sum = tf.summary.scalar('generator/cycle_loss',
                                                    self.cycle_loss)
            self.g_loss_a2b_sum = tf.summary.scalar('generator/g_loss_a2b',
                                                    self.g_loss_a2b)
            self.g_loss_b2a_sum = tf.summary.scalar('generator/g_loss_b2a',
                                                    self.g_loss_b2a)
            self.g_loss_sum = tf.summary.scalar('generator/g_loss',
                                                self.g_loss)

            if self.use_identity:
                self.iden_loss_sum = tf.summary.scalar(
                    'generator/identity_loss', self.iden_loss)
                self.g_sum = tf.summary.merge([
                    self.gan_loss_sum, self.cycle_loss_sum, self.iden_loss_sum,
                    self.g_loss_a2b_sum, self.g_loss_b2a_sum, self.g_loss_sum
                ])
            else:
                self.g_sum = tf.summary.merge([
                    self.gan_loss_sum, self.cycle_loss_sum,
                    self.g_loss_a2b_sum, self.g_loss_b2a_sum, self.g_loss_sum
                ])

            self.da_loss_real_sum = tf.summary.scalar(
                'discriminator/da_loss_real', self.da_loss_real)
            self.da_loss_fake_sum = tf.summary.scalar(
                'discriminator/da_loss_fake', self.da_loss_fake)
            self.da_loss_sum = tf.summary.scalar('discriminator/da_loss',
                                                 self.da_loss)
            self.db_loss_real_sum = tf.summary.scalar(
                'discriminator/db_loss_real', self.db_loss_real)
            self.db_loss_fake_sum = tf.summary.scalar(
                'discriminator/db_loss_fake', self.db_loss_fake)
            self.db_loss_sum = tf.summary.scalar('discriminator/db_loss',
                                                 self.db_loss)
            self.d_loss_sum = tf.summary.scalar('discriminator/d_loss',
                                                self.d_loss)

            if self.use_wgan:
                self.da_loss_GP_sum = tf.summary.scalar(
                    'discriminator/da_loss_GP', self.da_loss_GP)
                self.db_loss_GP_sum = tf.summary.scalar(
                    'discirminator/db_loss_GP', self.db_loss_GP)
                self.d_sum = tf.summary.merge([
                    self.da_loss_real_sum, self.da_loss_fake_sum,
                    self.da_loss_GP_sum, self.da_loss_sum,
                    self.db_loss_real_sum, self.db_loss_fake_sum,
                    self.db_loss_GP_sum, self.db_loss_sum, self.d_loss_sum
                ])
            else:
                self.d_sum = tf.summary.merge([
                    self.da_loss_real_sum, self.da_loss_fake_sum,
                    self.da_loss_sum, self.db_loss_real_sum,
                    self.db_loss_fake_sum, self.db_loss_sum, self.d_loss_sum
                ])

            self.scale_A = tf.placeholder(dtype,
                                          [1, None, self.nX, self.nC * 2],
                                          name='scale_tensor_A')
            self.scale_B = tf.placeholder(dtype,
                                          [1, None, self.nX, self.nC * 2],
                                          name='scale_tensor_B')

            self.real_A_ssos_sum = tf.summary.image(
                'ssos/real_full',
                tf_imgri2ssos(self.real_A * self.scale_A),
                max_outputs=1)
            self.real_B_ssos_sum = tf.summary.image(
                'ssos/real_down',
                tf_imgri2ssos(self.real_B * self.scale_B),
                max_outputs=1)
            self.fake_A_ssos_sum = tf.summary.image(
                'ssos/fake_full',
                tf_imgri2ssos(self.fake_A * self.scale_B),
                max_outputs=1)
            self.fake_B_ssos_sum = tf.summary.image(
                'ssos/fake_down',
                tf_imgri2ssos(self.fake_B * self.scale_A),
                max_outputs=1)
            self.recon_A_ssos_sum = tf.summary.image(
                'ssos/recon_full',
                tf_imgri2ssos(self.recon_A * self.scale_A),
                max_outputs=1)
            self.recon_B_ssos_sum = tf.summary.image(
                'ssos/recon_down',
                tf_imgri2ssos(self.recon_B * self.scale_B),
                max_outputs=1)
            self.ssos_sum = tf.summary.merge([
                self.real_A_ssos_sum, self.real_B_ssos_sum,
                self.fake_A_ssos_sum, self.fake_B_ssos_sum,
                self.recon_A_ssos_sum, self.recon_B_ssos_sum
            ])

            self.test_real_A = tf.placeholder(dtype,
                                              [1, None, self.nX, self.nC * 2],
                                              name='test_real_A')
            self.test_real_B = tf.placeholder(dtype,
                                              [1, None, self.nX, self.nC * 2],
                                              name='test_real_B')

            self.test_fake_B = self.generatorA2B(self.test_real_A)
            self.test_fake_A = self.generatorB2A(self.test_real_B)

            self.test_fake_img_B = tf.squeeze(
                tf_ri2comp(self.test_fake_B * self.scale_A))
            self.test_fake_img_A = tf.squeeze(
                tf_ri2comp(self.test_fake_A * self.scale_B))

            self.lr = tf.placeholder(dtype, None, name='learning_rate')
            self.lr_sum = tf.summary.scalar('learning_rate', self.lr)

            self.ga2b_optim = tf.train.AdamOptimizer(self.lr, beta1=self.beta1, beta2=self.beta2). \
                minimize(self.g_loss_a2b, var_list=self.generatorA2B.variables)
            self.gb2a_optim = tf.train.AdamOptimizer(self.lr, beta1=self.beta1, beta2=self.beta2). \
                minimize(self.g_loss_b2a, var_list=self.generatorB2A.variables)
            self.da_optim = tf.train.AdamOptimizer(self.lr, beta1=self.beta1, beta2=self.beta2). \
                minimize(self.da_loss, var_list=self.discriminatorA.variables)
            self.db_optim = tf.train.AdamOptimizer(self.lr, beta1=self.beta1, beta2=self.beta2). \
                minimize(self.db_loss, var_list=self.discriminatorB.variables)
Esempio n. 37
0
def split_data_by_Kfold(K, silence_percentage, unknown_percentage):
    random.seed(RANDOM_SEED)
    ret = []
    uid_list = []
    for k in range(K):
        print('{0} fold'.format(k))
        wanted_data = {TRAIN: [], VALID: []}

        ## Step1
        ## ----Guarantee same 'unknown words' doesn't exist in both train and valid set
        random.shuffle(unknown_words)
        _unknown_words_train = unknown_words[:10]
        _unknown_words_valid = unknown_words[10:]
        unknown_data_train = []
        unknown_data_valid = []
        uid = {TRAIN: [], VALID: []}
        missing_unknown_counts = 0
        train_unknown_counts = 0
        valid_unknown_counts = 0

        ## Step2
        ## ----Split 10 known words and other unknown words
        for wav_path in gfile.Glob(search_path):
            _, word = os.path.split(os.path.dirname(wav_path))
            word = word.lower()
            if word == BACKGROUND_NOISE_DIR_NAME:
                continue
            set_index, usr_id = distribute_fold(wav_path=wav_path, fold=k, K=K)
            uid[set_index].append(usr_id)
            if word in wanted_words:
                wanted_data[set_index].append({'label': word, 'file': wav_path})
            else:
                if set_index==TRAIN and word in _unknown_words_train:
                    train_unknown_counts += 1
                    unknown_data_train.append({'label': UNKNOWN_WORD_LABEL, 'file': wav_path})
                elif set_index==VALID and word in _unknown_words_valid:
                    valid_unknown_counts += 1
                    unknown_data_valid.append({'label': UNKNOWN_WORD_LABEL, 'file': wav_path})
                else:
                    missing_unknown_counts += 1
                    pass
        print('valid unknown counts : {0}'.format(valid_unknown_counts))
        print('train unknown counts : {0}'.format(train_unknown_counts))

        ## Step3
        ## ----Add 'silence' and 'unknown' according to preset 'silence_percentage' and 'unknown_percentage'
        addition_silence_unknown_count = 0
        for set_index in [VALID, TRAIN]:
            set_size = len(wanted_data[set_index])
            # add silence data
            silence_size = int(math.ceil(set_size * silence_percentage / 100))
            print('silence size : {0}'.format(silence_size))
            addition_silence_unknown_count += silence_size
            for _ in range(silence_size):
                wanted_data[set_index].append({'label': SILENCE_LABEL, 'file': SILENCE_FILE})
            # add unknown data
            unknown_size = int(math.ceil(set_size * unknown_percentage / 100))
            print('unknown size : {0}'.format(unknown_size))
            addition_silence_unknown_count += unknown_size
            if set_index==TRAIN:
                random.shuffle(unknown_data_train)
                st(context=21)
                wanted_data[set_index].extend(unknown_data_train[:unknown_size])
            else:
                random.shuffle(unknown_data_valid)
                wanted_data[set_index].extend(unknown_data_valid[:unknown_size])
        print('addition silence unknown count : {0}'.format(addition_silence_unknown_count))

        ## Step4
        ## ----Shuffle ordering
        for set_index in [VALID, TRAIN]:
            random.shuffle(wanted_data[set_index])
            uid[set_index] = list(set(uid[set_index]))
        ret.append(wanted_data)
        uid_list.append(uid)
        print('')
    return ret, uid_list
Esempio n. 38
0
    def check_car_path(self,car):
        conflict_cars = []
        failed = False
        blocked = False
        clear = True
        conflict = False
        mycone = self.get_vision_cone(car)
        self.car_boxes.clear()
        for cars in self.cars:
            box = Polygon([(cars.x-0.5,cars.y+1),(cars.x-0.5,cars.y-1),(cars.x+3,cars.y+1),(cars.x+3,cars.y-1)])
            rot_box = affinity.rotate(box, np.rad2deg(cars.yaw), origin = (cars.x,cars.y))
            self.car_boxes.update({cars.name: rot_box})
        #print(self.car_boxes)
        # Now check if car box and vision cone intersect
        if not car.last_segment and not car.close:
            myblocked_cone = self.get_vision_cone_blocked(car)
            for key,val in self.car_boxes.items():
                if key != car.name:
                    if myblocked_cone.intersects(val):
                        for cars in self.cars:
                            if cars.name == key:
                                if cars.status=='Failure' or cars.status =='Blocked':
                                    blocked = True 
                                    print('Failure or blocked car ahead')

        for key,val in self.car_boxes.items():
            if key != car.name:
                if mycone.intersects(val):
                    clear = False
                    #print('{0} stops because other car is in the path'.format(car.name))
                    for cars in self.cars:
                        if cars.name == key:
                            if cars.status=='Failure':
                                #failed_cars.append(cars)
                                failed = True
                                print('{0} blocked by a failed car'.format(car.name))
                            elif cars.parked:
                                print('Blocked by a parked car - Go on')
                            else:
                                conflict_cars.append(cars)
        # check if a pedestrian is in the cone
        mypedcone = self.get_vision_cone_pedestrian(car)
        for ped in self.peds:
            x_m = ped.state[0]/SCALE_FACTOR_SIM
            y_m = ped.state[1]/SCALE_FACTOR_SIM
            ped_point = Polygon([(x_m+2,y_m+1),(x_m+2,y_m-1),(x_m-1,y_m-1), (x_m-1,y_m+1),(x_m+2,y_m+1)])
            if ped_point.intersects(mypedcone):
                clear = False
                print('{0} stops because a pedestrian is in the path'.format(car.name))
        # check if they have a conflict with me
        mybox = self.car_boxes.get(car.name,0)
        for cars in conflict_cars:
            cone = self.get_vision_cone(cars)
            #print(cone)
            try:
                cone.intersects(mybox)
            except:
                st()
            if not cone.intersects(mybox):
                conflict_cars.remove(cars)
            else:
                conflict = True
                #print('There is a conflict')
        return clear, conflict_cars, failed, conflict, blocked
def run_one_fold(fold):
    # read whole train / test data for tokenizer
    df_train = read_train_data()
    df_test = read_test_data()

    # fit tokenizer : word level
    tokenizer = get_fitted_tokenizer(df_train, df_test)
    word_index = tokenizer.word_index
    transformers_count = 0
    all_words = set(word_index.keys())
    for toxic, transformers in toxicIndicator_transformers.items():
        for transformer in transformers:
            if transformer==toxic:
                continue
            if transformer in all_words:
                transformers_count += tokenizer.word_counts[transformer]
                # print(transformer)
    print('toxic transformer count : {0}'.format(transformers_count))
    print('unique token : {0}'.format(len(word_index)))

    # fit tokenizer : char level
    tokenizer_char = get_fitted_tokenizer_charLevel(df_train, df_test)
    word_index_char = tokenizer_char.word_index
    print('unique token char : {0}'.format(len(word_index_char)))

    # get embedding lookup table word level / char level
    embedding_dim = 300
    glove_path = '../data/input/glove_dir/glove.840B.300d.txt'
    glove_embedding_lookup_table, glove_embedding_lookup_table_char = \
        get_embedding_lookup_table(word_index, word_index_char, glove_path, embedding_dim)

    # read in fold data
    df_trn, df_val = read_data_in_fold(fold)

    # prepare data : word level
    X_test_word = get_padded_sequence(tokenizer, df_test[COMMENT_COL].astype('str').values.tolist())
    id_test = df_test[ID_COL].values.tolist()
    print('Test data shape {0}'.format(X_test_word.shape))
    X_trn_word = get_padded_sequence(tokenizer, df_trn[COMMENT_COL].astype('str').values.tolist())
    y_trn = df_trn[label_candidates].values
    print('Fold {0} train data shape {1} '.format(fold, X_trn_word.shape))
    X_val_word = get_padded_sequence(tokenizer, df_val[COMMENT_COL].astype('str').values.tolist())
    y_val = df_val[label_candidates].values
    id_val = df_val[ID_COL].values.tolist()
    print('Fold {0} valid data shape {1} '.format(fold, X_val_word.shape))

    # prepare data : char level
    X_test_char = get_padded_sequence_charLevel(tokenizer_char, df_test[COMMENT_COL].astype('str').values.tolist())
    id_test = df_test[ID_COL].values.tolist()
    print('Test data shape {0}'.format(X_test_char.shape))
    X_trn_char = get_padded_sequence_charLevel(tokenizer_char, df_trn[COMMENT_COL].astype('str').values.tolist())
    y_trn = df_trn[label_candidates].values
    print('Fold {0} train data shape {1} '.format(fold, X_trn_char.shape))
    X_val_char = get_padded_sequence_charLevel(tokenizer_char, df_val[COMMENT_COL].astype('str').values.tolist())
    y_val = df_val[label_candidates].values
    id_val = df_val[ID_COL].values.tolist()
    print('Fold {0} valid data shape {1} '.format(fold, X_val_char.shape))

    # prepare word / char level data
    X_test = [X_test_word, X_test_char]
    X_trn = [X_trn_word, X_trn_char]
    X_val = [X_val_word, X_val_char]


    # preds result array
    preds_test = np.zeros((X_test_word.shape[0], NUM_OF_LABEL))
    preds_valid = np.zeros((X_val_word.shape[0], NUM_OF_LABEL))

    # train model
    for run in range(RUNS_IN_FOLD):
        print('\nFold {0} run {1} begin'.format(fold, run))

        # model
        model = get_model(glove_embedding_lookup_table, glove_embedding_lookup_table_char, float(FLAGS.dp))
        # print(model.summary())

        if mode == 'try':
            st(context=3)

        # callbacks
        es = EarlyStopping(monitor='val_acc', mode='max', patience=5)
        bst_model_path = '../data/output/model/{0}fold_{1}run_glove_cnn.h5'.format(fold, run)
        mc = ModelCheckpoint(bst_model_path, save_best_only=True, save_weights_only=True)
        rp = ReduceLROnPlateau(
            monitor='val_acc', mode='max',
            patience=3,
            factor=np.sqrt(0.1),
            verbose=1
        )

        # train
        hist = model.fit(
            x=X_trn, y=y_trn,
            validation_data=(X_val, y_val),
            epochs=EPOCHS,
            batch_size=BATCH_SIZE,
            shuffle=True,
            callbacks=[es, mc, rp]
        )
        model.load_weights(bst_model_path)
        bst_val_score = max(hist.history['val_acc'])
        print('\nFold {0} run {1} best val score : {2}'.format(fold, run, bst_val_score))

        # predict
        preds_test += model.predict(X_test, batch_size=512, verbose=1) / RUNS_IN_FOLD
        preds_valid += model.predict(X_val, batch_size=512, verbose=1) / RUNS_IN_FOLD
        print('\nFold {0} run {1} done'.format(fold, run))

        del model
        gc.collect()

    # record preds result
    preds_test = preds_test.T
    df_preds_test = pd.DataFrame()
    df_preds_test[ID_COL] = id_test
    for idx, label in enumerate(label_candidates):
        df_preds_test[label] = preds_test[idx]
    df_preds_test.to_csv('../data/output/preds/glove_cnn/{0}/{1}fold_test.csv'.format(FLAGS.dp, fold), index=False)

    preds_valid = preds_valid.T
    df_preds_val = pd.DataFrame()
    df_preds_val[ID_COL] = id_val
    for idx, label in enumerate(label_candidates):
        df_preds_val[label] = preds_valid[idx]
    df_preds_val.to_csv('../data/output/preds/glove_cnn/{0}/{1}fold_valid.csv'.format(FLAGS.dp, fold), index=False)
Esempio n. 40
0
#encoding=utf8
"""
被装饰的函数带带参数的情况
"""
from ipdb import set_trace as st

def print_debug(func):
    def __decorator(ser):
        print('enter the login')
        func(ser)
        print('exit the login')
    return __decorator

@print_debug
def login(user):
    print('in login:'******'jatsz')
Esempio n. 41
0
    def getBatch_RGB_varInpID(self, start, end):
        end = min([end, self.len])
        batch = self.flist[start:end]

        # channel First :
        sz_a = [end - start, self.nCh_out, self.nY, self.nX]
        sz_M = [end - start, 1, self.nY, self.nX]

        target_class_idx = np.empty([end - start, 1], dtype=np.uint8)
        a_img = np.empty(sz_a, dtype=np.float32)
        b_img = np.empty(sz_a, dtype=np.float32)
        c_img = np.empty(sz_a, dtype=np.float32)
        d_img = np.empty(sz_a, dtype=np.float32)
        e_img = np.empty(sz_a, dtype=np.float32)
        f_img = np.empty(sz_a, dtype=np.float32)
        g_img = np.empty(sz_a, dtype=np.float32)
        n_img = np.empty(sz_a, dtype=np.float32)
        target_img = np.empty(sz_a, dtype=np.float32)

        a_mask = np.zeros(sz_M, dtype=np.float32)
        b_mask = np.zeros(sz_M, dtype=np.float32)
        c_mask = np.zeros(sz_M, dtype=np.float32)
        d_mask = np.zeros(sz_M, dtype=np.float32)
        e_mask = np.zeros(sz_M, dtype=np.float32)
        f_mask = np.zeros(sz_M, dtype=np.float32)
        g_mask = np.zeros(sz_M, dtype=np.float32)
        n_mask = np.zeros(sz_M, dtype=np.float32)

        targ_idx = random.randint(0, self.N - 1)
        tar_class_bools = [x == targ_idx for x in range(self.N)]

        # Here, choose the random file in the set, and which is not in the target
        bFname = self.flist[random.randint(0, self.len - 1)]
        random_for_batch2 = []

        random.seed(2)
        a_rand = random.randint(0, self.N - 1)

        # change here to edit change ID N
        for i in range(1):
            while (a_rand in random_for_batch2) or (a_rand == targ_idx):
                a_rand = random.randint(0, self.N - 1)
            random_for_batch2.append(a_rand)
        random_for_batch2.sort()
        random_bools = [x in random_for_batch2 for x in range(self.N)]

        for iB, aFname in enumerate(batch):
            aug_idx = random.randint(0, 1)
            lFname = bFname if random_bools[0] else aFname
            a_tmp = np.ndarray.astype(self.read_png(
                join(self.root, lFname + self.fExp[0] + '.png')),
                                      dtype=np.float32)
            lFname = bFname if random_bools[1] else aFname
            b_tmp = np.ndarray.astype(self.read_png(
                join(self.root, lFname + self.fExp[1] + '.png')),
                                      dtype=np.float32)
            lFname = bFname if random_bools[2] else aFname
            c_tmp = np.ndarray.astype(self.read_png(
                join(self.root, lFname + self.fExp[2] + '.png')),
                                      dtype=np.float32)
            lFname = bFname if random_bools[3] else aFname
            d_tmp = np.ndarray.astype(self.read_png(
                join(self.root, lFname + self.fExp[3] + '.png')),
                                      dtype=np.float32)
            lFname = bFname if random_bools[4] else aFname
            e_tmp = np.ndarray.astype(self.read_png(
                join(self.root, lFname + self.fExp[4] + '.png')),
                                      dtype=np.float32)
            lFname = bFname if random_bools[5] else aFname
            f_tmp = np.ndarray.astype(self.read_png(
                join(self.root, lFname + self.fExp[5] + '.png')),
                                      dtype=np.float32)
            lFname = bFname if random_bools[6] else aFname
            g_tmp = np.ndarray.astype(self.read_png(
                join(self.root, lFname + self.fExp[6] + '.png')),
                                      dtype=np.float32)
            lFname = bFname if random_bools[7] else aFname
            n_tmp = np.ndarray.astype(self.read_png(
                join(self.root, lFname + self.fExp[7] + '.png')),
                                      dtype=np.float32)

            if self.use_aug:
                if aug_idx == 1:
                    a_tmp = np.flip(a_tmp, axis=3)
                    b_tmp = np.flip(b_tmp, axis=3)
                    c_tmp = np.flip(c_tmp, axis=3)
                    d_tmp = np.flip(d_tmp, axis=3)
                    e_tmp = np.flip(e_tmp, axis=3)
                    f_tmp = np.flip(f_tmp, axis=3)
                    g_tmp = np.flip(g_tmp, axis=3)
                    n_tmp = np.flip(n_tmp, axis=3)
            if self.use_norm_std:
                a_img[iB, :, :, :] = a_tmp[:, :, :, 0] / np.std(a_tmp)
                b_img[iB, :, :, :] = b_tmp[:, :, :, 0] / np.std(b_tmp)
                c_img[iB, :, :, :] = c_tmp[:, :, :, 0] / np.std(c_tmp)
                d_img[iB, :, :, :] = d_tmp[:, :, :, 0] / np.std(d_tmp)
                e_img[iB, :, :, :] = e_tmp[:, :, :, 0] / np.std(e_tmp)
                f_img[iB, :, :, :] = f_tmp[:, :, :, 0] / np.std(f_tmp)
                g_img[iB, :, :, :] = g_tmp[:, :, :, 0] / np.std(g_tmp)
                n_img[iB, :, :, :] = n_tmp[:, :, :, 0] / np.std(n_tmp)
            else:
                scale = 255.0
                a_img[iB, :, :, :] = a_tmp[:, :, :, 0] / scale
                b_img[iB, :, :, :] = b_tmp[:, :, :, 0] / scale
                c_img[iB, :, :, :] = c_tmp[:, :, :, 0] / scale
                d_img[iB, :, :, :] = d_tmp[:, :, :, 0] / scale
                e_img[iB, :, :, :] = e_tmp[:, :, :, 0] / scale
                f_img[iB, :, :, :] = f_tmp[:, :, :, 0] / scale
                g_img[iB, :, :, :] = g_tmp[:, :, :, 0] / scale
                n_img[iB, :, :, :] = n_tmp[:, :, :, 0] / scale

            if targ_idx == 0:
                target_img[iB, :, :, :] = a_img[iB, :, :, :]
                a_mask[iB, 0, :, :] = 1.
            elif targ_idx == 1:
                target_img[iB, :, :, :] = b_img[iB, :, :, :]
                b_mask[iB, 0, :, :] = 1.
            elif targ_idx == 2:
                target_img[iB, :, :, :] = c_img[iB, :, :, :]
                c_mask[iB, 0, :, :] = 1.
            elif targ_idx == 3:
                target_img[iB, :, :, :] = d_img[iB, :, :, :]
                d_mask[iB, 0, :, :] = 1.
            elif targ_idx == 4:
                target_img[iB, :, :, :] = e_img[iB, :, :, :]
                e_mask[iB, 0, :, :] = 1.
            elif targ_idx == 5:
                target_img[iB, :, :, :] = f_img[iB, :, :, :]
                f_mask[iB, 0, :, :] = 1.
            elif targ_idx == 6:
                target_img[iB, :, :, :] = g_img[iB, :, :, :]
                g_mask[iB, 0, :, :] = 1.
            elif targ_idx == 7:
                target_img[iB, :, :, :] = n_img[iB, :, :, :]
                n_mask[iB, 0, :, :] = 1.
            else:
                st()
            target_class_idx[iB] = targ_idx
        return target_class_idx, a_img, b_img, c_img, d_img, e_img, f_img, g_img, n_img, a_mask, b_mask, c_mask, d_mask, e_mask, f_mask, g_mask, n_mask, tar_class_bools, target_img
Esempio n. 42
0
        np.transpose(voxel_, [2, 1, 0]) >= THRESHOLD,
        dims = [S1, S2, S3],
        translate = [0.0, 0.0, 0.0],
        scale = 1.0,
        axis_order = 'xyz'
    )

    with open(filename, "wb") as f:
        binvox_obj.write(f)


val = pickle.load(open("vis_embeddings/gqn3d_cond_tree_vae_concat_end_inf_segmentation_test/test/sphere gray large metal/000003_segmentation_masks.png","rb"))

# cube = pickle.load(open("voxel_shapes/real_shapes/cube.p","rb"))
# cylinder = pickle.load(open("voxel_shapes/real_shapes/cylinder.p","rb"))
val = pickle.load(open("voxel_shapes/real_shapes/sphere.p","rb"))

# cube_bce = utils.losses.binary_ce_loss(val,cube)
# cylinder_bce = utils.losses.binary_ce_loss(val,cylinder)
# sphere_bce = utils.losses.binary_ce_loss(val,sphere)
# print("cube",cube_bce)
# print("cylinder",cylinder_bce)
# print("spher",sphere_bce)


valrange= [i*0.05 for i in list(range(5,20))]
print(valrange)
for i in valrange:
	name = "ab_{}.binvox".format(i)
	st()
	save_voxel(val,name,THRESHOLD=i)
Esempio n. 43
0
def index():
    st(context=21)
    if 'username' in session:
        return 'Logged in as %s' % escape(session['username'])
    return 'You are not logged in'
Esempio n. 44
0
def login():
    st(context=21)
    if request.method == 'POST':
        session['username'] = request.form['username']
        return redirect(url_for('index'))
    return '''
Esempio n. 45
0
def run_cv():
    # read whole train / test data for extractor
    df_train = read_train_data()
    df_test = read_test_data()
    X_test = df_test[COMMENT_COL].values
    id_test = df_test[ID_COL].values.tolist()

    extractor_word = get_extractor('word')
    extractor_word.fit(pd.concat((df_train.loc[:, COMMENT_COL], df_test.loc[:, COMMENT_COL])))

    st(context=21)

    extractor_char = get_extractor('char')
    extractor_char.fit(pd.concat((df_train.loc[:, COMMENT_COL], df_test.loc[:, COMMENT_COL])))

    st(context=21)

    X_test_word = conduct_transform(extractor_word, X_test)
    X_test_char = conduct_transform(extractor_char, X_test)
    X_test_all = hstack([X_test_word, X_test_char])

    for fold in range(K):
        # read in fold data
        df_trn, df_val = read_data_in_fold(fold)

        X_trn = df_trn[COMMENT_COL].values
        X_trn_word = conduct_transform(extractor_word, X_trn)
        X_trn_char = conduct_transform(extractor_char, X_trn)
        X_trn_all = hstack([X_trn_word, X_trn_char])
        y_trn = df_trn[label_candidates].values
        print('\nFold {0} train data shape {1} '.format(fold, X_trn.shape))

        X_val = df_val[COMMENT_COL].values
        X_val_word = conduct_transform(extractor_word, X_val)
        X_val_char = conduct_transform(extractor_char, X_val)
        X_val_all = hstack([X_val_word, X_val_char])
        y_val = df_val[label_candidates].values
        id_val = df_val[ID_COL].values.tolist()
        print('Fold {0} valid data shape {1} '.format(fold, X_val.shape))

        # preds result array
        preds_test = np.zeros((X_test.shape[0], NUM_OF_LABEL))
        preds_valid = np.zeros((X_val.shape[0], NUM_OF_LABEL))

        models = []
        for idx, label in enumerate(label_candidates):
            print('\nFold {0} label {1}'.format(fold, label))
            model = get_model()
            print('   train')
            model.fit(X=X_trn_all, y=y_trn[:,idx])
            models.append(model)
            print('   predict valid')
            preds_valid[:,idx] = model.predict_proba(X=X_val_all)[:,1]

        # predict in fold
        print('Fold {0} predict test'.format(fold))
        for idx, model in enumerate(models):
            preds_test[:,idx] = model.predict_proba(X=X_test_all)[:,1]

        # record preds result
        preds_test = preds_test.T
        df_preds_test = pd.DataFrame()
        df_preds_test[ID_COL] = id_test
        for idx, label in enumerate(label_candidates):
            df_preds_test[label] = preds_test[idx]
        df_preds_test.to_csv('../data/output/preds/lr/{0}fold_test.csv'.format(fold), index=False)

        preds_valid = preds_valid.T
        df_preds_val = pd.DataFrame()
        df_preds_val[ID_COL] = id_val
        for idx, label in enumerate(label_candidates):
            df_preds_val[label] = preds_valid[idx]
        df_preds_val.to_csv('../data/output/preds/lr/{0}fold_valid.csv'.format(fold), index=False)
Esempio n. 46
0
#encoding=utf8
"""
"""
from ipdb import set_trace as st

class UpperString(object):
    def __init__(self):
        self._value = ''
    def __get__(self, instance, klass):
        return self._value
    def __set__(self, instance, value):
        self._value = value.upper()

class MyClass(object):
    attribute = UpperString()

st(context=21)

instance_of = MyClass()
instance_of.attribute

instance_of.attribute = 'my value'
instance_of.attribute

instance_of.__dict__ = {}
Esempio n. 47
0
 async def track_reference(self, Game, send_response_channel):
     now = trio.current_time()
     if self.depart_time <= now:
         self.delay = self.depart_time - now
     print('{0} - Tracking reference...'.format(self.name))
     self.close = False
     ck = 0
     dl = 1.0  # course tick
     if not self.status == 'Replan':
         try:
             self.check_if_car_is_in_spot(Game)
         except:
             st()
         if self.check_if_car_is_in_spot(Game):
             print('Car is in a parking spot')
             self.parked = True
             while not self.check_clear_before_unparking(Game):
                 await trio.sleep(0.1)
     self.status = 'Driving'
     self.parked = False
     # including a failure in 20% of cars
     failidx = len(self.ref)
     chance = random.randint(1, 100)  # changed to 0!!!
     if not self.replan:
         if len(self.ref) - 1 > 4 and chance <= 0:
             failidx = np.random.randint(low=4, high=6, size=1)
             if self.parking:
                 print('{0} will fail at acceptable spot: {1}'.format(
                     self.name, failidx))
             else:
                 print('{0} will fail in narrow path: {1}'.format(
                     self.name, failidx))
         elif len(self.ref) - 1 > 10 and chance <= 0:
             failidx = np.random.randint(low=len(self.ref) - 5,
                                         high=len(self.ref) - 1,
                                         size=1)
             if self.parking:
                 print('{0} will fail in narrow path: {1}'.format(
                     self.name, failidx))
             else:
                 print('{0} will fail at acceptable spot: {1}'.format(
                     self.name, failidx))
     # start tracking segments
     for i in range(0, len(self.ref) - 1):
         #print('{0} self.unparking'.format(self.name))
         #print(self.unparking)
         if (i == failidx):
             print('{0} Failing'.format(self.name))
             await self.failure(send_response_channel)
             return
         if i >= 1:
             self.unparking = False
         self.close = False
         if self.check_car_close_2_spot(Game):
             self.close = True
         self.status = 'Driving'
         path = self.ref[:][i]
         cx = path[:, 0] * SCALE_FACTOR_PLAN
         cy = path[:, 1] * SCALE_FACTOR_PLAN
         cyaw = np.deg2rad(path[:, 2]) * -1
         state = np.array([self.x, self.y, self.yaw])
         #  check  direction of the segment
         self.direction = tracking.check_direction(path)
         sp = tracking.calc_speed_profile(cx, cy, cyaw, TARGET_SPEED,
                                          TARGET_SPEED, self.direction)
         initial_state = State(x=state[0],
                               y=state[1],
                               yaw=state[2],
                               v=self.v)
         await self.track_async(cx, cy, cyaw, ck, sp, dl, initial_state,
                                TARGET_SPEED, Game, send_response_channel)
         await trio.sleep(0)
         if self.status == 'Replan':
             return
     if not self.status == 'Failure':
         self.last_segment = True
         state = np.array([self.x, self.y, self.yaw])
         path = self.ref[:][-1]
         cx = path[:, 0] * SCALE_FACTOR_PLAN
         cy = path[:, 1] * SCALE_FACTOR_PLAN
         cyaw = np.deg2rad(path[:, 2]) * -1
         self.direction = tracking.check_direction(path)
         initial_state = State(x=state[0],
                               y=state[1],
                               yaw=state[2],
                               v=self.v)
         sp = tracking.calc_speed_profile(cx, cy, cyaw, TARGET_SPEED / 2,
                                          0.0, self.direction)
         await self.track_async(cx, cy, cyaw, ck, sp, dl, initial_state,
                                0.0, Game, send_response_channel)
         if self.status == 'Replan':
             return
         self.status = 'Completed'
         self.is_at_pickup = self.check_at_pickup(Game)
         if self.is_at_pickup:
             self.retrieving = False
         self.last_segment = False
         if self.check_if_car_is_in_spot(Game):
             self.parked = True
         self.parking = False
         await self.send_response(send_response_channel)