Esempio n. 1
0
def create_full_indexes(args):
    mode, chno = args
    dirname = fu.get_month_sample_path_from_mode(mode)
    path = pjoin(dirname, 'C'+str(chno)+'.h5')
    store = pd.HDFStore(path)
    store.create_table_index('df', columns=['clat'], kind='full')
    store.close()
Esempio n. 2
0
def create_full_indexes(args):
    mode, chno = args
    dirname = fu.get_month_sample_path_from_mode(mode)
    path = pjoin(dirname, 'C' + str(chno) + '.h5')
    store = pd.HDFStore(path)
    store.create_table_index('df', columns=['clat'], kind='full')
    store.close()
Esempio n. 3
0
def ptrepack_all(args):
    mode, chno = args
    dirname = fu.get_month_sample_path_from_mode(mode)
    fname_root = pjoin(dirname, 'C'+str(chno))
    infile = fname_root + '.h5'
    outfile = fname_root + '_sorted.h5'
    cmd = ['ptrepack','--chunkshape=auto','--sortby=clat','--propindexes',
            infile, outfile]
    subprocess.call(cmd)
Esempio n. 4
0
def ptrepack_all(args):
    mode, chno = args
    dirname = fu.get_month_sample_path_from_mode(mode)
    fname_root = pjoin(dirname, 'C' + str(chno))
    infile = fname_root + '.h5'
    outfile = fname_root + '_sorted.h5'
    cmd = [
        'ptrepack', '--chunkshape=auto', '--sortby=clat', '--propindexes',
        infile, outfile
    ]
    subprocess.call(cmd)
Esempio n. 5
0
def get_percent_done(timestr, mode):
    """check how many files have been processed and tweet status regularly.
    
    >>> status_checker timestr mode 60[min]
    
    timestr determines which folders are being checked.
    """
    fnames_out = os.listdir(fu.get_month_sample_path_from_mode(mode))
    fnames = glob.glob(os.path.join(fu.l1adatapath, timestr + '*_L1A.TAB'))
    timestrs_done = [fu.FileName(i).timestr for i in fnames_out]
    # 7 = len[3,4,5,6,7,8,9], i.e. thermal channels
    # if there are not 7 channel files per timestr file, it's not done.
    fnames_todo = [i for i in fnames if timestrs_done.count(fu.FileName(i).timestr) < 7]
    all = len(fnames)
    left = len(fnames_todo)
    done = all - left
    return 100*done/all
Esempio n. 6
0
def get_percent_done(timestr, mode):
    """check how many files have been processed and tweet status regularly.
    
    >>> status_checker timestr mode 60[min]
    
    timestr determines which folders are being checked.
    """
    fnames_out = os.listdir(fu.get_month_sample_path_from_mode(mode))
    fnames = glob.glob(os.path.join(fu.l1adatapath, timestr + '*_L1A.TAB'))
    timestrs_done = [fu.FileName(i).timestr for i in fnames_out]
    # 7 = len[3,4,5,6,7,8,9], i.e. thermal channels
    # if there are not 7 channel files per timestr file, it's not done.
    fnames_todo = [
        i for i in fnames if timestrs_done.count(fu.FileName(i).timestr) < 7
    ]
    all = len(fnames)
    left = len(fnames_todo)
    done = all - left
    return 100 * done / all
Esempio n. 7
0
def store_channel_csv_to_h5(args):
    mode, ch = args
    dirname = fu.get_month_sample_path_from_mode(mode)
    searchpath = pjoin(dirname, '*_C'+str(ch)+'_*.csv')
    fnames = glob.glob(searchpath)
    if not fnames:
        print("No files found with searchpath\n",searchpath)
        return
    storepath = pjoin(dirname, 'C'+str(ch)+'.h5')
    store = pd.HDFStore(storepath,'w')
    for i,fname in enumerate(fnames):
        print(100*i/len(fnames))
        if i % 100 == 0:
            tweet_machine("C{0} conversion to HDF, {1:g}"
                          " % done.".format(ch, 100*i/len(fnames)))
        df = pd.io.parsers.read_csv(fname)
        if len(df) == 0: continue
        store.append('df', df, data_columns=['clat','clon','cloctime'], index=False)
    store.close()
    print("C{0} done.".format(ch))
Esempio n. 8
0
def plot_channel_filesizes(mode, show=False):

    dirname = fu.get_month_sample_path_from_mode(mode)

    fig, axes = plt.subplots(figsize=(12, 10))

    for ch in range(3, 10):
        searchpath = os.path.join(dirname, '*C' + str(ch) + '*.csv')
        fnames = glob.glob(searchpath)
        sizes = []
        fnames.sort()
        for fname in fnames:
            sizes.append(os.path.getsize(fname))
        axes.plot(sizes, label='Ch ' + str(ch))
    axes.legend(loc='best')
    axes.set_title("Length of {0} files.".format(mode))
    savefname = 'filesizes_' + mode + '.png'
    plt.savefig(savefname, dpi=300)
    print("Saving", savefname)
    if show:
        plt.show()
Esempio n. 9
0
def plot_channel_filesizes(mode, show=False):
    
    dirname = fu.get_month_sample_path_from_mode(mode)
    
    fig, axes = plt.subplots(figsize=(12,10))
    
    for ch in range(3,10):
        searchpath = os.path.join(dirname, '*C'+str(ch)+'*.csv')
        fnames = glob.glob(searchpath)
        sizes = []
        fnames.sort()
        for fname in fnames:
            sizes.append(os.path.getsize(fname))
        axes.plot(sizes,label='Ch '+str(ch))
    axes.legend(loc='best')
    axes.set_title("Length of {0} files.".format(mode))
    savefname = 'filesizes_'+mode+'.png'
    plt.savefig(savefname,dpi=300)
    print("Saving", savefname)
    if show:
        plt.show()
Esempio n. 10
0
def store_channel_csv_to_h5(args):
    mode, ch = args
    dirname = fu.get_month_sample_path_from_mode(mode)
    searchpath = pjoin(dirname, '*_C' + str(ch) + '_*.csv')
    fnames = glob.glob(searchpath)
    if not fnames:
        print("No files found with searchpath\n", searchpath)
        return
    storepath = pjoin(dirname, 'C' + str(ch) + '.h5')
    store = pd.HDFStore(storepath, 'w')
    for i, fname in enumerate(fnames):
        print(100 * i / len(fnames))
        if i % 100 == 0:
            tweet_machine("C{0} conversion to HDF, {1:g}"
                          " % done.".format(ch, 100 * i / len(fnames)))
        df = pd.io.parsers.read_csv(fname)
        if len(df) == 0: continue
        store.append('df',
                     df,
                     data_columns=['clat', 'clon', 'cloctime'],
                     index=False)
    store.close()
    print("C{0} done.".format(ch))
Esempio n. 11
0
def process_one_channel(args):
    # unpack argument tuple
    fn, l1a_channel, rdr_channel, tbout_all, radout_all, rdrdf, dfdate, dfutc = args

    print("Processing channel", rdr_channel)

    # filter out for channel of interest
    tbout = tbout_all.filter(regex=l1a_channel + '_')
    radout = radout_all.filter(regex=l1a_channel + '_')

    # rename detector names to rdr standard, and reverse detector numbering for
    # detectors of telescope B
    if l1a_channel.startswith('b'):
        tbout.columns = radout.columns = range(21, 0, -1)
    else:
        tbout.rename(columns=lambda x: int(x[3:]), inplace=True)
        radout.rename(columns=lambda x: int(x[3:]), inplace=True)

    rdrch = rdrdf[rdrdf.c == rdr_channel]

    rdrout = rdrch[[
        'date', 'utc', 'jdate', 'clat', 'clon', 'sclat', 'sclon', 'scrad',
        'orientlat', 'orientlon', 'sunlat', 'sunlon', 'sundist', 'orbit',
        'scalt', 'af', 'c', 'det', 'cemis', 'cloctime', 'qca', 'qge', 'qmi'
    ]]

    rdrout.columns = [
        'date', 'utc', 'jdate', 'clat', 'clon', 'sclat', 'sclon', 'scrad',
        'vert_lat', 'vert_lon', 'sunlat', 'sunlon', 'sundst', 'orbit', 'scalt',
        'af', 'c', 'det', 'cemis', 'cloctime', 'qca', 'qge', 'qmi'
    ]

    #fix the columns names so the orientlat orientlon will be the wrong name from
    # divdata: vert_lan and vert_lon
    # because the index of tbout is less than df, only the dates for tbout's index
    # should be picked out of df
    tbout['date'] = dfdate
    tbout['utc'] = dfutc
    radout['date'] = dfdate
    radout['utc'] = dfutc

    tbmolten = pd.melt(tbout, id_vars=['date', 'utc'], value_vars=range(1, 22))
    radmolten = pd.melt(radout,
                        id_vars=['date', 'utc'],
                        value_vars=range(1, 22))

    # the melting process left funny columns names. repair.
    tbmolten.columns = ['date', 'utc', 'det', 'tb']
    radmolten.columns = ['date', 'utc', 'det', 'radiance']

    data_out = tbmolten.merge(rdrout, on=['date', 'utc', 'det'])
    data_out = radmolten.merge(data_out, on=['date', 'utc', 'det'])
    print("Merged successfully. Writing out to csv now.")

    # create filename
    basename = fn.timestr + '_C' + str(rdr_channel) + '_' + mode + '_RDR20.csv'
    dirname = fu.get_month_sample_path_from_mode(mode)
    if not os.path.exists(dirname):
        os.makedirs(dirname)
    outfname = pjoin(dirname, basename)

    # don't write out the meaningless integer index
    data_out.to_csv(outfname, index=False)
    print("Finished", os.path.basename(outfname))
Esempio n. 12
0
    if len(sys.argv) < 2:
        usage()
    timestr = sys.argv[1]
    fnames = glob.glob(os.path.join(fu.l1adatapath, timestr + '*_L1A.TAB'))
    fnames.sort()
    if sys.argv[2] == 'test':
        pprint.pprint(fnames)
        sys.exit()

    mode = sys.argv[2]
    cpus = sys.argv[3]

    # find outpaths that are done
    try:
        fnames_done = glob.glob(
            pjoin(fu.get_month_sample_path_from_mode(mode), '*.csv'))
        timestrs_done = [fu.FileName(i).timestr for i in fnames_done]
        fnames_todo = [
            i for i in fnames
            if timestrs_done.count(fu.FileName(i).timestr) < 7
        ]
    except OSError:
        fnames_todo = fnames

    fnames_todo.sort()
    # create input tuple to have pool.map only 1 parameter to provide
    list_of_input_tuples = [(i, mode) for i in fnames_todo]

    pool = mypool.MyPool(int(cpus))
    pool.map(main, list_of_input_tuples)
Esempio n. 13
0
def get_data_from_hour(mode, ch, timestr):
    ch = ch[:2]
    dirname = fu.get_month_sample_path_from_mode(mode)
    fname = '_'.join([timestr, ch, mode, 'RDR20'])
    fname += '.csv'
    return pd.io.parsers.read_csv(os.path.join(dirname, fname))
Esempio n. 14
0
def get_data_from_hour(mode, ch, timestr):
    ch = ch[:2]
    dirname = fu.get_month_sample_path_from_mode(mode)
    fname = "_".join([timestr, ch, mode, "RDR20"])
    fname += ".csv"
    return pd.io.parsers.read_csv(os.path.join(dirname, fname))
Esempio n. 15
0
def process_one_channel(args):
    # unpack argument tuple
    fn, l1a_channel, rdr_channel, tbout_all, radout_all, rdrdf, dfdate, dfutc = args

    print("Processing channel",rdr_channel)
    
    # filter out for channel of interest
    tbout = tbout_all.filter(regex=l1a_channel+'_')
    radout = radout_all.filter(regex=l1a_channel+'_')

    # rename detector names to rdr standard, and reverse detector numbering for
    # detectors of telescope B
    if l1a_channel.startswith('b'):
        tbout.columns = radout.columns = range(21,0,-1)
    else:
        tbout.rename(columns=lambda x:int(x[3:]), inplace=True)
        radout.rename(columns=lambda x:int(x[3:]), inplace=True)
  
    rdrch = rdrdf[rdrdf.c == rdr_channel]

    rdrout = rdrch[['date','utc','jdate','clat','clon','sclat','sclon','scrad',
                    'orientlat','orientlon',
                    'sunlat','sunlon','sundist','orbit','scalt','af','c','det','cemis','cloctime',
                    'qca','qge','qmi']]

    rdrout.columns = ['date','utc','jdate','clat','clon','sclat','sclon','scrad',
                    'vert_lat','vert_lon',
                    'sunlat','sunlon','sundst','orbit','scalt','af','c','det','cemis','cloctime',
                    'qca','qge','qmi']


    #fix the columns names so the orientlat orientlon will be the wrong name from
    # divdata: vert_lan and vert_lon
    # because the index of tbout is less than df, only the dates for tbout's index 
    # should be picked out of df
    tbout['date'] = dfdate
    tbout['utc'] = dfutc
    radout['date'] = dfdate
    radout['utc'] = dfutc

    tbmolten = pd.melt(tbout, id_vars=['date','utc'], value_vars=range(1,22))
    radmolten = pd.melt(radout, id_vars=['date','utc'], value_vars=range(1,22))

    # the melting process left funny columns names. repair.
    tbmolten.columns = ['date','utc','det','tb']
    radmolten.columns = ['date','utc','det','radiance']

    data_out = tbmolten.merge(rdrout, on=['date','utc','det'])
    data_out = radmolten.merge(data_out, on=['date','utc','det'])
    print("Merged successfully. Writing out to csv now.")

    # create filename
    basename = fn.timestr + '_C' + str(rdr_channel) + '_' + mode + '_RDR20.csv'
    dirname = fu.get_month_sample_path_from_mode(mode)
    if not os.path.exists(dirname):
        os.makedirs(dirname)
    outfname = pjoin(dirname, basename)

    # don't write out the meaningless integer index
    data_out.to_csv(outfname, index=False)
    print("Finished",os.path.basename(outfname))
Esempio n. 16
0
if __name__ == '__main__':
    if len(sys.argv) < 2:
        usage()
    timestr = sys.argv[1]
    fnames = glob.glob(os.path.join(fu.l1adatapath, timestr + '*_L1A.TAB'))
    fnames.sort()
    if sys.argv[2] == 'test':
        pprint.pprint(fnames)
        sys.exit()

    mode = sys.argv[2]
    cpus = sys.argv[3]

    # find outpaths that are done
    try:
        fnames_done = glob.glob(pjoin(fu.get_month_sample_path_from_mode(mode),'*.csv'))
        timestrs_done = [fu.FileName(i).timestr for i in fnames_done]
        fnames_todo = [i for i in fnames if timestrs_done.count(fu.FileName(i).timestr) < 7]
    except OSError:
        fnames_todo = fnames
        
    fnames_todo.sort()
    # create input tuple to have pool.map only 1 parameter to provide
    list_of_input_tuples = [(i, mode) for i in fnames_todo]

    pool = mypool.MyPool(int(cpus))
    pool.map(main, list_of_input_tuples)
    
    pool.close()
    pool.join()