def run_step_encode(self, df_norm): stream=get_encode_stream(df_norm, self.base, self.dtype) if self.save['stream']: self.save_txt(stream, 'stream') elif self.idx is not None: self.save_txt(stream[self.idx[0]:self.idx[1]],f'stream{self.idx[-1]}') return stream
def process_rebin(df_norm, base, dtype): stream1D = get_encode_stream(df_norm, base, dtype) return stream1D
def main(): try: os.mkdir(PRETRAIN_PATH) except: print('here we go!') if PREPRO_HH_PHOTO: if PREPRO_STREAM_PHOTO: if PREPRO_NORM_PHOTO: if PREPRO_NORM_PARAMS: print('=====================PREPRO SPECS====================') dfspec,vmin,vrng,df_lbl=prepro_specs(SPEC_DATA, ftr, r=0.01,w=True,wpath=PRETRAIN_PATH) elif PREPRO_NORM_PHOTO: vmin=np.loadtxt(f'{PRETRAIN_PATH}/vmin.txt') vrng=np.loadtxt(f'{PRETRAIN_PATH}/vrng.txt') print('=====================PREPRO PHOTO====================') dfphoto=prepro_photos(PHOTO_DATA, vmin, vrng, base,ftr, w=True, wpath=PRETRAIN_PATH) else: print('=====================LOADING PHOTO NORM ====================') dfphoto=pd.read_csv(f'{PRETRAIN_PATH}/photo_norm_{base}.csv',index=False) print('=====================ENCODE PHOTO ====================') photo_stream=get_encode_stream(dfphoto, base,dtype) np.savetxt(f'{PRETRAIN_PATH}/photo_stream.txt',photo_stream) else: print('=====================LOADING PHOTO STREAM ====================') photo_stream=np.loadtxt(f'{PRETRAIN_PATH}/photo_stream.txt') print('===================== COUNTING PHOTO HH==================') photoHH_pd=get_HH_pd(photo_stream,base,ftr_len,dtype, EXACT_COUNTING,topk,r=16, d=1000000,c=None,device=None) photoHH_pd.to_csv(f'{PRETRAIN_PATH}/photo_HH.csv', index=False) else: photoHH_pd=pd.read_csv(f'{PRETRAIN_PATH}/photo_HH.csv') print('photoHH_pd',photoHH_pd) if PREPRO_STREAM_SPEC: if not PREPRO_NORM_PARAMS: dfspec=pd.read_csv(f'{PRETRAIN_PATH}/spec_norm.csv') df_lbl=pd.read_csv(f'{PRETRAIN_PATH}/spec_lbl.csv') print('=====================ENCODING SPEC ====================') spec_stream=get_encode_stream(dfspec, base,dtype) np.savetxt(f'{PRETRAIN_PATH}/spec_stream.txt',spec_stream) df_lbl['encode']=spec_stream df_lbl.to_csv(f'{PRETRAIN_PATH}/spec_lbl_encode.csv', index=False) else: df_lbl=pd.read_csv(f'{PRETRAIN_PATH}/spec_lbl_encode.csv') if PREPRO_HH_SPEC: spec_stream=np.loadtxt(f'{PRETRAIN_PATH}/spec_stream.txt') if PREPRO_HH_SPEC: print('=====================COUNTING PHOTO HH====================') specHH_pd=get_HH_pd(spec_stream,base,ftr_len,dtype,True,topk) print('=====================UMAPPING SPEC ====================') specHH_pd.to_csv(f'{PRETRAIN_PATH}/specHH_pd.csv',index=False) elif MAP_PHOTO: specHH_pd=pd.read_csv(f'{PRETRAIN_PATH}/specHH_pd.csv') print('specHH_pd',specHH_pd) if MAP_PHOTO: print('=============MAPPING PHOTO============') if PREPRO_SPEC_UMAP: HH_pdQS,umapT_spec= get_spec_mapping(specHH_pd,ftr, df_lbl, base,name,umap_comp,HH_cut=20000) print('HH_pdQS',HH_pdQS) # joblib.dump(model_dict, f'{PRETRAIN_PATH}/model_b{base}.sav') joblib.dump(umapT_spec, f'{PRETRAIN_PATH}/umap_spec_b{base}.sav') HH_pdQS.to_csv(f'{PRETRAIN_PATH}/spec_HHQS.csv',index=False) else: umapT_spec=joblib.load(f'{PRETRAIN_PATH}/umap_spec_b{base}.sav') # umapT=joblib.load(f'{PRETRAIN_PATH}/umap_b{base}.sav') print('=====================UMAP PROJECTING PHOTO ====================') photo_mapped=get_mapping_pd(photoHH_pd,umapT_spec, list(range(ftr_len))) print('=====================SAVING SMAPPED PHOTO ====================') photo_mapped.to_csv(f'{PRETRAIN_PATH}/photoUTe{EXACT_COUNTING}.csv',index=False) else: if not PREPRO_HH_SPEC : df_lbl=pd.read_csv(f'{PRETRAIN_PATH}/spec_lbl_encode.csv') print('=============MAPPING SPEC============') if PREPRO_SPEC_UT: if PREPRO_PHOTO_UMAP: photoHH_pdh=photoHH_pd[:topk] print(photoHH_pdh) try: umapT_photo=get_umap_pd(photoHH_pdh,list(range(ftr_len)), umap_comp) except: umapT_photo=get_umap_pd(photoHH_pdh,ftr_str, umap_comp) joblib.dump(umapT_photo, f'{PRETRAIN_PATH}/umap_photo_b{base}.sav') photoHH_pdh.to_csv(f'{PRETRAIN_PATH}/photoHH_pdh.csv', index=False) else: umapT_photo=joblib.load(f'pretrain/umap_photo_b{base}.sav') if not PREPRO_NORM_PARAMS: dfspec=pd.read_csv(f'{PRETRAIN_PATH}/spec_norm.csv') dfspec=(dfspec*(base-1)).round() spec_pm=get_mapping_pd(dfspec,umapT_photo,dfspec.keys()) spec_pm.to_csv(f'{PRETRAIN_PATH}/spec_pm_e{EXACT_COUNTING}.csv',index=False) else: spec_pm=pd.read_csv(f'{PRETRAIN_PATH}/spec_pm_e{EXACT_COUNTING}.csv') specUT_lbled= pd.concat([spec_pm,df_lbl],axis=1) specUT_lbled.to_csv(f'{PRETRAIN_PATH}/spec_pm_e{EXACT_COUNTING}_lbl.csv',index=False)
def main(): try: os.mkdir(PRETRAIN) except: print('here we go!') if PRE_NORM: dfphoto, dfspec, df_lbl = prepro_photo_spec(PHOTO_DATA, SPEC_DATA, base, ftr, wpath=PRETRAIN) if PRE_HH: print('=====================ENCODE PHOTO ====================') photo_stream = get_encode_stream(dfphoto, base, dtype) spec_stream = get_encode_stream(dfspec, base, dtype) # np.savetxt(f'{PRETRAIN}/photo_stream.txt',photo_stream) # np.savetxt(f'{PRETRAIN}/spec_stream.txt',spec_stream) df_lbl['encode'] = spec_stream df_lbl.to_csv(f'{PRETRAIN}/spec_lbl_encode.csv', index=False) photo_HH = get_HH_pd(photo_stream, base, ftr_len, dtype, EXACT, topk, r=16, d=1000000, c=None, device=None) if not EXACT: assert len(photo_HH) <= topk else: photo_HH = photo_HH[:topk] photo_HH.to_csv(f'{PRETRAIN}/photo_HH.csv', index=False) spec_HH = get_HH_pd(spec_stream, base, ftr_len, dtype, True, topk) spec_HH.to_csv(f'{PRETRAIN}/spec_HH.csv', index=False) elif PRE_UMAP or MAP_SPEC: photo_HH = pd.read_csv(f'{PRETRAIN}/photo_HH.csv') spec_HH = pd.read_csv(f'{PRETRAIN}/spec_HH.csv') df_lbl = pd.read_csv(f'{PRETRAIN}/spec_lbl_encode.csv') print('photo_HH', photo_HH) print('spec_HH', spec_HH) if PRE_UMAP: print('=============GETTING UMAP============') try: photo_uT = get_umap_pd(photo_HH, list(range(ftr_len)), umap_comp) except: photo_uT = get_umap_pd(photo_HH, ftr_str, umap_comp) joblib.dump(photo_uT, f'{PRETRAIN}/photo_uT_b{base}.sav') photo_HH.to_csv(f'{PRETRAIN}/photo_HH.csv', index=False) elif MAP_SPEC: photo_uT = joblib.load(f'pretrain/photo_uT_b{base}.sav') if MAP_SPEC: if not PRE_NORM: dfspec = pd.read_csv(f'{PRETRAIN}/spec_norm.csv') dfspec_block = (dfspec * (base - 1)).round() assert (dfspec_block.min().min() >= 0) & (dfspec_block.max().max() <= base - 1) spec_pm = get_mapping_pd(dfspec_block, photo_uT, dfspec.keys()) spec_pm.to_csv(f'{PRETRAIN}/spec_pm_e{EXACT}.csv', index=False) else: spec_pm = pd.read_csv(f'{PRETRAIN}/spec_pm_e{EXACT}.csv') spec_pmlbl = pd.concat([spec_pm, df_lbl], axis=1) spec_pmlbl.to_csv(f'{PRETRAIN}/spec_pm_e{EXACT}_lbl.csv', index=False) if UPLOAD_SCI: username = '******' password = '******' # password = getpass.getpass() sciserver_token = Authentication.login(username, password) CasJobs.uploadPandasDataFrameToTable( dataFrame=photo_HH, tableName=f'{name}b{base}e{EXACT}std', context="MyDB")
def main(): try: os.mkdir(PRETRAIN) except: print('here we go!') if PRE_SPEC: dfspec, vmean, vstd, df_lbl = prepro_std_specs(SPEC_DATA, ftr=ftr, sig=3.0, w=True, wpath=PRETRAIN) elif PRE_PHOTO_HH: vmean = np.loadtxt(f'{PRETRAIN_PATH}/vmean.txt') vstd = np.loadtxt(f'{PRETRAIN_PATH}/vstd.txt') if PRE_PHOTO_HH: print('=====================PREPRO PHOTO====================') dfphoto = prepro_std_photos(PHOTO_DATA, vmean, vstd, ftr=ftr, sig=3.0) photo_stream = get_encode_stream(dfphoto, base, dtype) photo_HH = get_HH_pd(photo_stream, base, ftr_len, dtype, EXACT, topk, r=16, d=1000000, c=None, device=None) if not EXACT: assert len(photo_HH) <= topk else: photo_HH = photo_HH[:topk] photo_HH.to_csv(f'{PRETRAIN}/photo_HH.csv', index=False) elif PRE_UMAP: photo_HH = pd.read_csv(f'{PRETRAIN}/photo_HH.csv', columns=list(range(ftr_len))) if PRE_UMAP: print('=============GETTING UMAP============') try: photo_uT = get_umap_pd(photo_HH, list(range(ftr_len)), umap_comp) except: photo_uT = get_umap_pd(photo_HH, ftr_str, umap_comp) joblib.dump(photo_uT, f'{PRETRAIN}/photo_uT.sav') photo_HH.to_csv(f'{PRETRAIN}/photo_HH.csv', index=False) elif MAP_SPEC: photo_uT = joblib.load(f'pretrain/photo_uT.sav') if MAP_SPEC: if not PRE_SPEC: dfspec = pd.read_csv(f'{PRETRAIN}/spec_norm.csv') dfspec_block = (dfspec * (base - 1)).round() assert (dfspec_block.min().min() >= 0) & (dfspec_block.max().max() <= base - 1) spec_pm = get_mapping_pd(dfspec_block, photo_uT, dfspec.keys()) spec_pm.to_csv(f'{PRETRAIN}/spec_pm_e{EXACT}.csv', index=False) else: spec_pm = pd.read_csv(f'{PRETRAIN}/spec_pm_e{EXACT}.csv') spec_pmlbl = pd.concat([spec_pm, df_lbl], axis=1) spec_pmlbl.to_csv(f'{PRETRAIN}/spec_pm_e{EXACT}_lbl.csv', index=False) if UPLOAD_SCI: username = '******' password = '******' # password = getpass.getpass() sciserver_token = Authentication.login(username, password) CasJobs.uploadPandasDataFrameToTable( dataFrame=photo_HH, tableName=f'{name}b{base}e{EXACT}std', context="MyDB")