def read_detection_rate(infilename):
    """
    Read in the detection rate information for a given star/date combination.
    :param infilename: The file to read in. It can be one of two things:
                       1. A csv file (must have the extension .csv) containing the information for all the stars.
                          This is the file output as Sensitivity_Dataframe.csv in Sensitivity.analyze_sensitivity.
                       2. An hdf5 file containing the raw sensitivity information.
    :return: a pandas dataframe with the detection rate and average significance
             for all star/date combinations the user chooses.
    """

    # Read in the data, however it was stored.
    if infilename.endswith('csv'):
        df = pd.read_csv(infilename)
    else:
        # Assume an HDF5 file. Eventually, I should have it throw an informative error...
        df = Sensitivity.read_hdf5(infilename)
        df.to_csv('temp.csv', index=False)

    # Group by primary star, date observed, and the way the CCFs were added.
    groups = df.groupby(('star', 'date', 'addmode', 'primary SpT'))

    # Have the user choose which groups to analyze
    for i, key in enumerate(groups.groups.keys()):
        print('[{}]: {}'.format(i + 1, key))
    inp = raw_input('Enter the numbers of the keys you want to plot (, or - delimited): ')
    chosen = Sensitivity.parse_input(inp)
    keys = [k for i, k in enumerate(groups.groups.keys()) if i + 1 in chosen]

    # Compile dataframes for each star
    dataframes = defaultdict(lambda: defaultdict(pd.DataFrame))
    for key in keys:
        g = groups.get_group(key)
        detrate = g.groupby(('temperature', 'vsini', 'logL', 'contrast')).apply(
            lambda df: float(sum(df.significance.notnull())) / float(len(df)))
        significance = g.groupby(('temperature', 'vsini', 'logL', 'contrast')).apply(
            lambda df: np.nanmean(df.significance))
        dataframes['detrate'][key] = detrate.reset_index().rename(columns={0: 'detection rate'})
        dataframes['significance'][key] = significance.reset_index().rename(columns={0: 'significance'})

    return dataframes
Beispiel #2
0
def calSensitiValues(file, max_s, EntCharDict, OutECDict):

    data_s_all = []

    f = codecs.open(file, 'r', encoding='utf-8')
    data_s = []
    for line in f.readlines():

        if line.__len__() <= 1:
            data_s = data_s[0:min(len(data_s), max_s)] + [[math.log(
                1e-5)]] * max(0, max_s - len(data_s))
            data_s_all.append(data_s)
            data_s = []
            continue

        sent = line.strip('\r\n').rstrip('\n').split('\t')
        chara = sent[0]
        sv = Sensitivity.calSensitiValue1(chara, EntCharDict, OutECDict)
        data_s.append([sv])

    f.close()

    return data_s_all
Beispiel #3
0
def get_data(trainfile,
             testfile,
             w2v_file,
             c2v_file,
             base_datafile,
             user_datafile,
             w2v_k,
             c2v_k=100,
             data_split=1,
             maxlen=50):
    """
    数据处理的入口函数
    Converts the input files  into the model input formats

    """
    '''
    pos_vob, pos_idex_word = get_Feature_index([trainfile,devfile,testfile])
    pos_train = make_idx_POS_index(trainfile, max_s, pos_vob)
    pos_dev = make_idx_POS_index(devfile, max_s, pos_vob)
    pos_test = make_idx_POS_index(testfile, max_s, pos_vob)
    pos_W, pos_k = load_vec_character(pos_vob, 30)
    # pos_k, pos_W = load_vec_onehot(pos_vob)

    # print('entlabel vocab size:'+str(len(entlabel_vob)))
    print('shape in pos_W:', pos_W.shape)
    '''

    if not os.path.exists(base_datafile):

        print("Precess base data....")
        char_vob, idex_2char, target_vob, idex_2target, max_s = get_Character_index(
            {trainfile})
        print("source char size: ", char_vob.__len__())
        print("max_s: ", max_s)
        # max_s = 136
        # print("max_s: ", max_s)
        print("source char: ", len(idex_2char))
        print("target vocab size: ", len(target_vob), str(target_vob))
        print("target vocab size: ", len(idex_2target))

        if 'DoubleEmd' in c2v_file:
            char_k, char_W = load_vec_txt_DoubleEmd(c2v_file, char_vob, c2v_k)
        else:
            char_k, char_W = load_vec_txt(c2v_file, char_vob, c2v_k)
        print('character_W shape:', char_W.shape)

        print("base dataset created!")
        out = open(base_datafile, 'wb')
        pickle.dump([
            char_vob, target_vob, idex_2char, idex_2target, char_W, char_k,
            max_s
        ], out, 0)
        out.close()

    else:
        print("base data has existed ....")
        char_vob, target_vob,\
        idex_2char, idex_2target,\
        char_W,\
        char_k,\
        max_s = pickle.load(open(base_datafile, 'rb'))

    train_all, target_all = make_idx_Char_index(trainfile, max_s, char_vob,
                                                target_vob)

    file = './data/subtask1_training_all.txt'
    EntCharDict, OutECDict, count_allc, count_entc = Sensitivity.GetVariousDist(
        file)
    train_all_SensitiV = calSensitiValues(trainfile, max_s, EntCharDict,
                                          OutECDict)

    extra_test_num = int(len(train_all) / 5)
    # test_all, test_target_all = make_idx_Char_index(testfile, max_s, char_vob, target_vob)
    # test = train_all[:extra_test_num]
    # test_label = target_all[:extra_test_num]
    # train = train_all[extra_test_num:] + test_all[:]
    # train_label = target_all[extra_test_num:] + test_target_all[:]
    # print('extra_test_num', extra_test_num)

    test = train_all[extra_test_num * (data_split - 1):extra_test_num *
                     data_split]
    test_SensitiV = train_all_SensitiV[extra_test_num *
                                       (data_split - 1):extra_test_num *
                                       data_split]
    test_label = target_all[extra_test_num * (data_split - 1):extra_test_num *
                            data_split]
    train = train_all[:extra_test_num *
                      (data_split - 1)] + train_all[extra_test_num *
                                                    data_split:]
    train_SensitiV = train_all_SensitiV[:extra_test_num * (
        data_split - 1)] + train_all_SensitiV[extra_test_num * data_split:]
    train_label = target_all[:extra_test_num *
                             (data_split - 1)] + target_all[extra_test_num *
                                                            data_split:]
    print('extra_test_num....data_split', extra_test_num, data_split)

    print('train len  ', train.__len__(), len(train_label))
    print('test len  ', test.__len__(), len(test_label))

    print("dataset created!")
    out = open(user_datafile, 'wb')
    pickle.dump(
        [train, train_SensitiV, train_label, test, test_SensitiV, test_label],
        out, 0)
    out.close()
Beispiel #4
0
    print("dataset created!")
    out = open(user_datafile, 'wb')
    pickle.dump(
        [train, train_SensitiV, train_label, test, test_SensitiV, test_label],
        out, 0)
    out.close()


if __name__ == "__main__":
    print(20 * 2)

    trainfile = './data/subtask1_training_all.conll.txt'

    c2v_file = "./data/preEmbedding/CCKS2019_DoubleEmd_Char2Vec.txt"

    print("Precess base data....")
    char_vob, idex_2char, target_vob, idex_2target, max_s = get_Character_index(
        {trainfile})
    print("source char size: ", char_vob.__len__())
    print("max_s: ", max_s)
    max_s = 136
    print("max_s: ", max_s)
    print("source char: ", len(idex_2char))
    print("target vocab size: ", len(target_vob), str(target_vob))
    print("target vocab size: ", len(idex_2target))

    file = './data/subtask1_training_all.txt'
    EntCharDict, OutECDict = Sensitivity.GetVariousDist(file)
    train_all_SensitiV = calSensitiValues(trainfile, max_s, EntCharDict,
                                          OutECDict)
    Sensitivity.Analyze(new_file_list, new_prim_vsini,
                        hdf5_file='/media/ExtraSpace/PhoenixGrid/IGRINS_Grid.hdf5',
                        extensions=True,
                        resolution=None,
                        trimsize=trimsize,
                        badregions=badregions, interp_regions=interp_regions,
                        metal_values=(0.0,),
                        vsini_values=(75, 100, 125, 150, 175, 200, 225, 250),
                        Tvalues=range(3000, 3300, 100),
                        debug=False,
                        addmode='all',
                        output_mode='hdf5',
                        output_file='Sensitivity.hdf5')


if __name__ == '__main__':
    if '--analyze' in sys.argv[1]:
        # Make the 2d plots
        df = Sensitivity.analyze_sensitivity(hdf5_file='Sensitivity.hdf5', interactive=False, update=False)

    elif '--marginalize' in sys.argv[1]:
        fig, ax = Sensitivity.marginalize_sensitivity(infilename='Sensitivity_Dataframe.csv')
        # plt.show()
        ensure_dir('Figures/')
        plt.savefig('Figures/Sensitivity_Marginalized.pdf')


    else:
        check_sensitivity()
Beispiel #6
0
def calc_Error( PS,k, z, flag1, flag2, PS_CII=0.0, PS_21=0.0):
  if flag1=='SKA':
    lamda1=21000.
    t_int1=6000.
    N_survey=866
    delta_nu1=0.00025
    baseline=40000.
    Aeff = 962.0
    d_antenna=962.
    bmax,plm = 40286.83, 10**5
    theta1= 3.14*0.21/(baseline*180)
  if flag1=='LOFAR':
    lamda1=21000.
    B_nu1=0.008
    Omega_S1=25.
    delta_nu1=0.00025
    t_int1=6000.
    Aeff = 526.0
    baseline=3400.
    N_survey=48.
    bmax,plm = 3475.584,10**6
    theta1= 3.14*0.21/(baseline*180)

  V_s=3.3*10**7*(2./16.0)*(80.0/20.0)*((1.+z)/8.0)**0.5 #cMpc/h
  
  if flag2=='CII':
    aperture=12.0  #12m CONCERTO - 6m Stage 2
    transmission=0.3
    A_survey=2.0 #2 deg^2 CONCERTO - 10 deg^2 Stage 2
    d_nu=1500.  #1.5GHz CONCERTO - 0.4 Stage 2
    NEFD=0.155 #155mJy CONCERTO 31mJy Stage 2
    N_pix=1500.
    t_int=1500.*3600. #1500hr CONCERTO - 1000hr Stage2
    theta_beam=1.22*158.0*(1+z)/(aperture*10**6)
    Omega_beam=2.*3.14*(theta_beam/2.355)**2
    print(Omega_beam)
    t_pix=t_int*N_pix*Omega_beam/(A_survey*3.14*3.14/(180.**2))
    print(t_pix)
    sigma_pix=NEFD/Omega_beam
    print(sigma_pix)
    V_pix_CII= 1.1*(10**3)*(((1.+z)/8.)**0.5)*(((theta_beam*180*60.)/(10.*3.14))**2)*(d_nu/400.)
    print(V_pix_CII)
    PN_CII=(sigma_pix**2)*V_pix_CII/t_pix
    print(PN_CII)
    delta_k=k[2]-k[1]
    N_m=(V_s*delta_k*(k**2))/(4.*3.141*3.141)
    error=(PS + (PN_CII*(k**3)/(2.*3.14*3.14)))/np.sqrt(N_m)

  if flag2=='cross':
    aperture=12.0  #12m CONCERTO - 6m Stage 2
    transmission=0.3
    A_survey=2. #2 deg^2 CONCERTO - 10 deg^2 Stage 2
    d_nu=1500.  #1.5GHz CONCERTO - 0.4 Stage 2
    NEFD=0.155 #155mJy CONCERTO 31mJy Stage 2
    N_pix=1500.
    t_int=1500.*3600. #1500hr CONCERTO - 1000hr Stage2
    theta_beam=1.22*158.0*(1+z)/(aperture*10**6)
    Omega_beam=2.*3.14*(theta_beam/2.355)**2
    print(Omega_beam)
    t_pix=t_int*N_pix*Omega_beam/(A_survey*3.14*3.14/(180.**2))
    print(t_pix)
    sigma_pix=NEFD/Omega_beam
    print(sigma_pix)
    V_pix_CII= 1.1*(10**3)*(((1.+z)/8.)**0.5)*(((theta_beam*180*60.)/(10.*3.14))**2)*(d_nu/400.)
    print(V_pix_CII)
    PN_CII=(sigma_pix**2)*V_pix_CII/t_pix
    print(PN_CII)
    delta_k=k[2]-k[1]
    N_m=(V_s*delta_k*(k**2))/(4.*3.141*3.141)
    bmin= 14.0# m
    wl = 0.21 * (1.0+z) # m
    umin, umax = bmin/wl, bmax/wl
    umax = bmax/wl
    c = 3.0e8# m
    nu = c/wl # Hz
    nu *= 1.0e-6 # MHz
    Tsys = 60.0*(300.0/nu)**2.55 # K
    sens_hera =Sensitivity.sens_Parsons2012(k, t_days=180.0, B=6.0, Tsys=Tsys,
                                N=N_survey, u_max=umax, Omega=wl**2/Aeff)
    error=np.sqrt(PS**2+(PS_21+sens_hera*np.sqrt(N_m))*(PS_CII+(PN_CII*V_pix_CII*(k**3)/(2.*3.14*3.14))))/np.sqrt(N_m)
    

  if flag2=='21':
    V_pix1=1.1*(10**3)*(21000./158.)*(((1.+z)/8)**0.5)*((theta1/10.)**2)*(delta_nu1/400)  #cMpc/h
    delta_k=k[3]-k[2]
    N_m=(V_s*delta_k*(k**2))/(4*3.141*3.141)
    bmin= 14.0 # m
    wl = 0.21 * (1.0+z) # m
    umin, umax = bmin/wl, bmax/wl
    umax = bmax/wl
    c = 3.0e8 # m
    nu = c/wl # Hz
    nu *= 1.0e-6 # MHz
    Tsys = 60.0*(300.0/nu)**2.55 # K
    sens_hera =Sensitivity.sens_Parsons2012(k, t_days=180.0, B=6.0, Tsys=Tsys,
                                N=N_survey, u_max=umax, Omega=wl**2/Aeff)
    error=(PS/np.sqrt(N_m) + sens_hera)

  return error
def make_plot(filename, prim_spt, Tsec, instrument, vsini):
    orders = HelperFunctions.ReadExtensionFits(filename)
    fig, ax, _ = Sensitivity.plot_expected(orders, prim_spt, Tsec, instrument, vsini=vsini)
    fig.subplots_adjust(left=0.2, bottom=0.18, right=0.94, top=0.94)

    plt.show()