def read_detection_rate(infilename): """ Read in the detection rate information for a given star/date combination. :param infilename: The file to read in. It can be one of two things: 1. A csv file (must have the extension .csv) containing the information for all the stars. This is the file output as Sensitivity_Dataframe.csv in Sensitivity.analyze_sensitivity. 2. An hdf5 file containing the raw sensitivity information. :return: a pandas dataframe with the detection rate and average significance for all star/date combinations the user chooses. """ # Read in the data, however it was stored. if infilename.endswith('csv'): df = pd.read_csv(infilename) else: # Assume an HDF5 file. Eventually, I should have it throw an informative error... df = Sensitivity.read_hdf5(infilename) df.to_csv('temp.csv', index=False) # Group by primary star, date observed, and the way the CCFs were added. groups = df.groupby(('star', 'date', 'addmode', 'primary SpT')) # Have the user choose which groups to analyze for i, key in enumerate(groups.groups.keys()): print('[{}]: {}'.format(i + 1, key)) inp = raw_input('Enter the numbers of the keys you want to plot (, or - delimited): ') chosen = Sensitivity.parse_input(inp) keys = [k for i, k in enumerate(groups.groups.keys()) if i + 1 in chosen] # Compile dataframes for each star dataframes = defaultdict(lambda: defaultdict(pd.DataFrame)) for key in keys: g = groups.get_group(key) detrate = g.groupby(('temperature', 'vsini', 'logL', 'contrast')).apply( lambda df: float(sum(df.significance.notnull())) / float(len(df))) significance = g.groupby(('temperature', 'vsini', 'logL', 'contrast')).apply( lambda df: np.nanmean(df.significance)) dataframes['detrate'][key] = detrate.reset_index().rename(columns={0: 'detection rate'}) dataframes['significance'][key] = significance.reset_index().rename(columns={0: 'significance'}) return dataframes
def calSensitiValues(file, max_s, EntCharDict, OutECDict): data_s_all = [] f = codecs.open(file, 'r', encoding='utf-8') data_s = [] for line in f.readlines(): if line.__len__() <= 1: data_s = data_s[0:min(len(data_s), max_s)] + [[math.log( 1e-5)]] * max(0, max_s - len(data_s)) data_s_all.append(data_s) data_s = [] continue sent = line.strip('\r\n').rstrip('\n').split('\t') chara = sent[0] sv = Sensitivity.calSensitiValue1(chara, EntCharDict, OutECDict) data_s.append([sv]) f.close() return data_s_all
def get_data(trainfile, testfile, w2v_file, c2v_file, base_datafile, user_datafile, w2v_k, c2v_k=100, data_split=1, maxlen=50): """ 数据处理的入口函数 Converts the input files into the model input formats """ ''' pos_vob, pos_idex_word = get_Feature_index([trainfile,devfile,testfile]) pos_train = make_idx_POS_index(trainfile, max_s, pos_vob) pos_dev = make_idx_POS_index(devfile, max_s, pos_vob) pos_test = make_idx_POS_index(testfile, max_s, pos_vob) pos_W, pos_k = load_vec_character(pos_vob, 30) # pos_k, pos_W = load_vec_onehot(pos_vob) # print('entlabel vocab size:'+str(len(entlabel_vob))) print('shape in pos_W:', pos_W.shape) ''' if not os.path.exists(base_datafile): print("Precess base data....") char_vob, idex_2char, target_vob, idex_2target, max_s = get_Character_index( {trainfile}) print("source char size: ", char_vob.__len__()) print("max_s: ", max_s) # max_s = 136 # print("max_s: ", max_s) print("source char: ", len(idex_2char)) print("target vocab size: ", len(target_vob), str(target_vob)) print("target vocab size: ", len(idex_2target)) if 'DoubleEmd' in c2v_file: char_k, char_W = load_vec_txt_DoubleEmd(c2v_file, char_vob, c2v_k) else: char_k, char_W = load_vec_txt(c2v_file, char_vob, c2v_k) print('character_W shape:', char_W.shape) print("base dataset created!") out = open(base_datafile, 'wb') pickle.dump([ char_vob, target_vob, idex_2char, idex_2target, char_W, char_k, max_s ], out, 0) out.close() else: print("base data has existed ....") char_vob, target_vob,\ idex_2char, idex_2target,\ char_W,\ char_k,\ max_s = pickle.load(open(base_datafile, 'rb')) train_all, target_all = make_idx_Char_index(trainfile, max_s, char_vob, target_vob) file = './data/subtask1_training_all.txt' EntCharDict, OutECDict, count_allc, count_entc = Sensitivity.GetVariousDist( file) train_all_SensitiV = calSensitiValues(trainfile, max_s, EntCharDict, OutECDict) extra_test_num = int(len(train_all) / 5) # test_all, test_target_all = make_idx_Char_index(testfile, max_s, char_vob, target_vob) # test = train_all[:extra_test_num] # test_label = target_all[:extra_test_num] # train = train_all[extra_test_num:] + test_all[:] # train_label = target_all[extra_test_num:] + test_target_all[:] # print('extra_test_num', extra_test_num) test = train_all[extra_test_num * (data_split - 1):extra_test_num * data_split] test_SensitiV = train_all_SensitiV[extra_test_num * (data_split - 1):extra_test_num * data_split] test_label = target_all[extra_test_num * (data_split - 1):extra_test_num * data_split] train = train_all[:extra_test_num * (data_split - 1)] + train_all[extra_test_num * data_split:] train_SensitiV = train_all_SensitiV[:extra_test_num * ( data_split - 1)] + train_all_SensitiV[extra_test_num * data_split:] train_label = target_all[:extra_test_num * (data_split - 1)] + target_all[extra_test_num * data_split:] print('extra_test_num....data_split', extra_test_num, data_split) print('train len ', train.__len__(), len(train_label)) print('test len ', test.__len__(), len(test_label)) print("dataset created!") out = open(user_datafile, 'wb') pickle.dump( [train, train_SensitiV, train_label, test, test_SensitiV, test_label], out, 0) out.close()
print("dataset created!") out = open(user_datafile, 'wb') pickle.dump( [train, train_SensitiV, train_label, test, test_SensitiV, test_label], out, 0) out.close() if __name__ == "__main__": print(20 * 2) trainfile = './data/subtask1_training_all.conll.txt' c2v_file = "./data/preEmbedding/CCKS2019_DoubleEmd_Char2Vec.txt" print("Precess base data....") char_vob, idex_2char, target_vob, idex_2target, max_s = get_Character_index( {trainfile}) print("source char size: ", char_vob.__len__()) print("max_s: ", max_s) max_s = 136 print("max_s: ", max_s) print("source char: ", len(idex_2char)) print("target vocab size: ", len(target_vob), str(target_vob)) print("target vocab size: ", len(idex_2target)) file = './data/subtask1_training_all.txt' EntCharDict, OutECDict = Sensitivity.GetVariousDist(file) train_all_SensitiV = calSensitiValues(trainfile, max_s, EntCharDict, OutECDict)
Sensitivity.Analyze(new_file_list, new_prim_vsini, hdf5_file='/media/ExtraSpace/PhoenixGrid/IGRINS_Grid.hdf5', extensions=True, resolution=None, trimsize=trimsize, badregions=badregions, interp_regions=interp_regions, metal_values=(0.0,), vsini_values=(75, 100, 125, 150, 175, 200, 225, 250), Tvalues=range(3000, 3300, 100), debug=False, addmode='all', output_mode='hdf5', output_file='Sensitivity.hdf5') if __name__ == '__main__': if '--analyze' in sys.argv[1]: # Make the 2d plots df = Sensitivity.analyze_sensitivity(hdf5_file='Sensitivity.hdf5', interactive=False, update=False) elif '--marginalize' in sys.argv[1]: fig, ax = Sensitivity.marginalize_sensitivity(infilename='Sensitivity_Dataframe.csv') # plt.show() ensure_dir('Figures/') plt.savefig('Figures/Sensitivity_Marginalized.pdf') else: check_sensitivity()
def calc_Error( PS,k, z, flag1, flag2, PS_CII=0.0, PS_21=0.0): if flag1=='SKA': lamda1=21000. t_int1=6000. N_survey=866 delta_nu1=0.00025 baseline=40000. Aeff = 962.0 d_antenna=962. bmax,plm = 40286.83, 10**5 theta1= 3.14*0.21/(baseline*180) if flag1=='LOFAR': lamda1=21000. B_nu1=0.008 Omega_S1=25. delta_nu1=0.00025 t_int1=6000. Aeff = 526.0 baseline=3400. N_survey=48. bmax,plm = 3475.584,10**6 theta1= 3.14*0.21/(baseline*180) V_s=3.3*10**7*(2./16.0)*(80.0/20.0)*((1.+z)/8.0)**0.5 #cMpc/h if flag2=='CII': aperture=12.0 #12m CONCERTO - 6m Stage 2 transmission=0.3 A_survey=2.0 #2 deg^2 CONCERTO - 10 deg^2 Stage 2 d_nu=1500. #1.5GHz CONCERTO - 0.4 Stage 2 NEFD=0.155 #155mJy CONCERTO 31mJy Stage 2 N_pix=1500. t_int=1500.*3600. #1500hr CONCERTO - 1000hr Stage2 theta_beam=1.22*158.0*(1+z)/(aperture*10**6) Omega_beam=2.*3.14*(theta_beam/2.355)**2 print(Omega_beam) t_pix=t_int*N_pix*Omega_beam/(A_survey*3.14*3.14/(180.**2)) print(t_pix) sigma_pix=NEFD/Omega_beam print(sigma_pix) V_pix_CII= 1.1*(10**3)*(((1.+z)/8.)**0.5)*(((theta_beam*180*60.)/(10.*3.14))**2)*(d_nu/400.) print(V_pix_CII) PN_CII=(sigma_pix**2)*V_pix_CII/t_pix print(PN_CII) delta_k=k[2]-k[1] N_m=(V_s*delta_k*(k**2))/(4.*3.141*3.141) error=(PS + (PN_CII*(k**3)/(2.*3.14*3.14)))/np.sqrt(N_m) if flag2=='cross': aperture=12.0 #12m CONCERTO - 6m Stage 2 transmission=0.3 A_survey=2. #2 deg^2 CONCERTO - 10 deg^2 Stage 2 d_nu=1500. #1.5GHz CONCERTO - 0.4 Stage 2 NEFD=0.155 #155mJy CONCERTO 31mJy Stage 2 N_pix=1500. t_int=1500.*3600. #1500hr CONCERTO - 1000hr Stage2 theta_beam=1.22*158.0*(1+z)/(aperture*10**6) Omega_beam=2.*3.14*(theta_beam/2.355)**2 print(Omega_beam) t_pix=t_int*N_pix*Omega_beam/(A_survey*3.14*3.14/(180.**2)) print(t_pix) sigma_pix=NEFD/Omega_beam print(sigma_pix) V_pix_CII= 1.1*(10**3)*(((1.+z)/8.)**0.5)*(((theta_beam*180*60.)/(10.*3.14))**2)*(d_nu/400.) print(V_pix_CII) PN_CII=(sigma_pix**2)*V_pix_CII/t_pix print(PN_CII) delta_k=k[2]-k[1] N_m=(V_s*delta_k*(k**2))/(4.*3.141*3.141) bmin= 14.0# m wl = 0.21 * (1.0+z) # m umin, umax = bmin/wl, bmax/wl umax = bmax/wl c = 3.0e8# m nu = c/wl # Hz nu *= 1.0e-6 # MHz Tsys = 60.0*(300.0/nu)**2.55 # K sens_hera =Sensitivity.sens_Parsons2012(k, t_days=180.0, B=6.0, Tsys=Tsys, N=N_survey, u_max=umax, Omega=wl**2/Aeff) error=np.sqrt(PS**2+(PS_21+sens_hera*np.sqrt(N_m))*(PS_CII+(PN_CII*V_pix_CII*(k**3)/(2.*3.14*3.14))))/np.sqrt(N_m) if flag2=='21': V_pix1=1.1*(10**3)*(21000./158.)*(((1.+z)/8)**0.5)*((theta1/10.)**2)*(delta_nu1/400) #cMpc/h delta_k=k[3]-k[2] N_m=(V_s*delta_k*(k**2))/(4*3.141*3.141) bmin= 14.0 # m wl = 0.21 * (1.0+z) # m umin, umax = bmin/wl, bmax/wl umax = bmax/wl c = 3.0e8 # m nu = c/wl # Hz nu *= 1.0e-6 # MHz Tsys = 60.0*(300.0/nu)**2.55 # K sens_hera =Sensitivity.sens_Parsons2012(k, t_days=180.0, B=6.0, Tsys=Tsys, N=N_survey, u_max=umax, Omega=wl**2/Aeff) error=(PS/np.sqrt(N_m) + sens_hera) return error
def make_plot(filename, prim_spt, Tsec, instrument, vsini): orders = HelperFunctions.ReadExtensionFits(filename) fig, ax, _ = Sensitivity.plot_expected(orders, prim_spt, Tsec, instrument, vsini=vsini) fig.subplots_adjust(left=0.2, bottom=0.18, right=0.94, top=0.94) plt.show()