def main(): found = False try: pump = fu.L1ADataPump(sys.argv[1]) except: print("Use: {} year (4-digits)".format(sys.argv[0])) sys.exit() todo = pump.fnames Parallel(n_jobs=12)(delayed(check_fname)(fname) for fname in todo) divtweet.tweet_machine('now really: {} checked for monotonicity.'.format(sys.argv[1]))
def produce_store_file_main(): try: year = sys.argv[1] except IndexError: print("Usage: {0} year(yyyy)".format(sys.argv[0])) sys.exit() months = range(1,13) for month in months: timestr = year + str(month).zfill(2) divtweet.tweet_machine("Producing metadata for {0}".format(timestr)) print("Producing", timestr) produce_store_file(timestr)
def main(): found = False try: pump = fu.L1ADataPump(sys.argv[1]) except: print("Use: {} year (4-digits)".format(sys.argv[0])) sys.exit() todo = pump.fnames Parallel(n_jobs=12)(delayed(check_fname)(fname) for fname in todo) divtweet.tweet_machine('now really: {} checked for monotonicity.'.format( sys.argv[1]))
def produce_store_file_main(): try: year = sys.argv[1] except IndexError: print("Usage: {0} year(yyyy)".format(sys.argv[0])) sys.exit() months = range(1, 13) for month in months: timestr = year + str(month).zfill(2) divtweet.tweet_machine("Producing metadata for {0}".format(timestr)) print("Producing", timestr) produce_store_file(timestr)
def divmetadata(): if len(sys.argv) < 3: print("Usage: {0} month_start month_end [YYYYMM]".format(sys.argv[0])) sys.exit() months = pd.period_range(sys.argv[1],sys.argv[2], freq='M') cmd_middle = ("clat=-90,90 c=3,3 det=11,11 | pextract extract=year,month,date,hour," "minute,second,jdate,orbit,sundst,sunlat,sunlon,sclk,sclat,sclon,scrad,scalt," "el_cmd,az_cmd,af,vert_lat,vert_lon,vlookx,vlooky,vlookz,clat,clon,cemis," "csunzen,csunazi,cloctime,qca,qge,qmi,qual | pprint titles=0 > ") for month in months: print("Producing metadata for month {0}".format(month)) ts = month.to_timestamp() cmd_base = 'divdata daterange={0}'.format(ts.strftime('%Y%m ')) outfname = os.path.join(savedir, '{0}_divmetadata.csv'.format(ts.strftime('%Y%m'))) cmd = cmd_base + cmd_middle + outfname call(cmd, shell=True) divtweet.tweet_machine("Metadata for month {0} finished.".format(month))
def resampler(year, interval): """Resample the full resolution metadata files of year <year> down to <interval>. <interval> can be format strings like '1d' for downsampling to days, '1h' for hours, '1min' for minutes, etc. <year> can be given as string or int. """ fnames = glob.glob(pjoin(savedir, str(year) + '??.h5')) fnames.sort() l = [] for fname in fnames: print("Reading {0}".format(fname)) l.append(pd.read_hdf(fname, 'df').resample(interval, kind='period')) df = pd.concat(l) hdf_fname = pjoin(savedir, str(year) + '_daily_means.h5') df.to_hdf(hdf_fname, 'df') print("Created {}.".format(hdf_fname)) divtweet.tweet_machine("Fininshed resampling {0} to {1}".format(year, interval))
def store_channel_csv_to_h5(args): mode, ch = args dirname = fu.get_month_sample_path_from_mode(mode) searchpath = pjoin(dirname, '*_C'+str(ch)+'_*.csv') fnames = glob.glob(searchpath) if not fnames: print("No files found with searchpath\n",searchpath) return storepath = pjoin(dirname, 'C'+str(ch)+'.h5') store = pd.HDFStore(storepath,'w') for i,fname in enumerate(fnames): print(100*i/len(fnames)) if i % 100 == 0: tweet_machine("C{0} conversion to HDF, {1:g}" " % done.".format(ch, 100*i/len(fnames))) df = pd.io.parsers.read_csv(fname) if len(df) == 0: continue store.append('df', df, data_columns=['clat','clon','cloctime'], index=False) store.close() print("C{0} done.".format(ch))
def resampler(year, interval): """Resample the full resolution metadata files of year <year> down to <interval>. <interval> can be format strings like '1d' for downsampling to days, '1h' for hours, '1min' for minutes, etc. <year> can be given as string or int. """ fnames = glob.glob(pjoin(savedir, str(year) + '??.h5')) fnames.sort() l = [] for fname in fnames: print("Reading {0}".format(fname)) l.append(pd.read_hdf(fname, 'df').resample(interval, kind='period')) df = pd.concat(l) hdf_fname = pjoin(savedir, str(year) + '_daily_means.h5') df.to_hdf(hdf_fname, 'df') print("Created {}.".format(hdf_fname)) divtweet.tweet_machine("Fininshed resampling {0} to {1}".format( year, interval))
def divmetadata(): if len(sys.argv) < 3: print("Usage: {0} month_start month_end [YYYYMM]".format(sys.argv[0])) sys.exit() months = pd.period_range(sys.argv[1], sys.argv[2], freq='M') cmd_middle = ( "clat=-90,90 c=3,3 det=11,11 | pextract extract=year,month,date,hour," "minute,second,jdate,orbit,sundst,sunlat,sunlon,sclk,sclat,sclon,scrad,scalt," "el_cmd,az_cmd,af,vert_lat,vert_lon,vlookx,vlooky,vlookz,clat,clon,cemis," "csunzen,csunazi,cloctime,qca,qge,qmi,qual | pprint titles=0 > ") for month in months: print("Producing metadata for month {0}".format(month)) ts = month.to_timestamp() cmd_base = 'divdata daterange={0}'.format(ts.strftime('%Y%m ')) outfname = os.path.join( savedir, '{0}_divmetadata.csv'.format(ts.strftime('%Y%m'))) cmd = cmd_base + cmd_middle + outfname call(cmd, shell=True) divtweet.tweet_machine( "Metadata for month {0} finished.".format(month))
def store_channel_csv_to_h5(args): mode, ch = args dirname = fu.get_month_sample_path_from_mode(mode) searchpath = pjoin(dirname, '*_C' + str(ch) + '_*.csv') fnames = glob.glob(searchpath) if not fnames: print("No files found with searchpath\n", searchpath) return storepath = pjoin(dirname, 'C' + str(ch) + '.h5') store = pd.HDFStore(storepath, 'w') for i, fname in enumerate(fnames): print(100 * i / len(fnames)) if i % 100 == 0: tweet_machine("C{0} conversion to HDF, {1:g}" " % done.".format(ch, 100 * i / len(fnames))) df = pd.io.parsers.read_csv(fname) if len(df) == 0: continue store.append('df', df, data_columns=['clat', 'clon', 'cloctime'], index=False) store.close() print("C{0} done.".format(ch))
#!/usr/bin/env python # encoding: UTF-8 from __future__ import division, print_function import pandas as pd import glob from diviner.divtweet import tweet_machine # imports api handle fnames = glob.glob('/u/paige/maye/raid/rdr20_month_samples/nominal/*_C9_*.csv') store = pd.HDFStore('/u/paige/maye/raid/rdr20_month_samples/nominal/C9.h5') todo = fnames for i,fname in enumerate(todo): print(100*i/len(todo)) if i % 50 == 0: tweet_machine('Converting to h5, {0:g} % done.'.format(100*i/len(todo))) df = pd.io.parsers.read_csv(fname) if len(df) == 0: continue store.append('df',df,data_columns=['clat','clon','cloctime']) store.close()