def enqueue_blobs(time_series,queue): """config needs psql_connect, resolver""" config = get_config(CONFIG_FILE, time_series) feed = IfcbFeed(config.psql_connect) r = parse_stream(config.resolver) blob_resolver = r['mvco_blob'] pid_resolver = r['pid'] for lid in feed.latest_bins(n=10000): if blob_resolver.resolve(pid=lid,time_series=time_series) is None: pid = pid_resolver.resolve(pid=lid,time_series=time_series).bin_pid print 'No blobs found for %s, enqueuing' % pid extract_blobs.apply_async(args=[time_series, pid],queue=queue)
def list_new_filesets(time_series,psql_connect,resolver,after_year=2012): feed = IfcbFeed(psql_connect) r = parse_stream(resolver) for s in list_adcs(time_series,resolver,after_year): if feed.exists(s.pid): logging.info('%s EXISTS in time series %s' % (s.pid, time_series)) else: logging.info('%s NEW, not already in time series %s' % (s.pid, time_series)) fs = r['fileset'].resolve(pid=s.pid,product='raw',time_series=time_series,day_dir=s.day_dir) if fs is None: logging.warn('%s UNRESOLVABLE cannot find raw files' % s.pid) else: yield fs
def enqueue_features(time_series,queue): """config needs psql_connect, resolver""" config = get_config(CONFIG_FILE, time_series) feed = IfcbFeed(config.psql_connect) r = parse_stream(config.resolver) blob_resolver = r['mvco_blob'] feature_resolver = r['features'] pid_resolver = r['pid'] for lid in feed.latest_bins(n=5000): if blob_resolver.resolve(pid=lid,time_series=time_series) is not None: pid = pid_resolver.resolve(pid=lid,time_series=time_series).bin_pid if feature_resolver.resolve(pid=lid,time_series=time_series) is None: print 'found blobs but no features for %s' % pid extract_features.apply_async(args=[time_series, pid],queue=queue)
def accede(config_file, time_series): config = get_config(config_file, time_series) logging.info('parsed config file %s:%s' % (config_file, time_series)) fx = IfcbFixity(config.psql_connect) feed = IfcbFeed(config.psql_connect) try: year_pattern = config.year_pattern except: year_pattern = '....' with xa(config.psql_connect) as (c, db): for s in list_new_filesets(time_series,config.psql_connect,config.resolver,year_pattern=year_pattern): # FIXME hardcoded try: check_integrity(s.pid, s.hdr_path, s.adc_path, s.roi_path, s.schema_version) except Exception, e: logger.warn('%s FAIL integrity checks: %s' % (s.pid, e)) continue # hot diggity, we've got some good data # compute fixity try: fx.fix(s.pid, s.hdr_path, cursor=db, filetype='hdr') logger.info('%s FIXITY computed for %s' % (s.pid, s.hdr_path)) fx.fix(s.pid, s.adc_path, cursor=db, filetype='adc') logger.info('%s FIXITY computed for %s' % (s.pid, s.adc_path)) fx.fix(s.pid, s.roi_path, cursor=db, filetype='roi') logger.info('%s FIXITY computed for %s' % (s.pid, s.roi_path)) except: logger.error('%s FAIL fixity cannot be computed!' % s.pid) c.rollback() continue # register bin try: ts = text2utcdatetime(s.date, s.date_format) feed.create(s.pid, ts, cursor=db) c.commit() logger.info('%s DONE' % s.pid) except: logger.error('%s FAILED' % s.pid) continue
logging.info('%s PASS integrity check %s' % (pid, hdr_path)) targets = list(integrity.check_adc(LocalFileSource(adc_path), schema_version=schema_version)) logging.info('%s PASS integrity check %s' % (pid, adc_path)) integrity.check_roi(LocalFileSource(roi_path), targets) logging.info('%s PASS integrity check %s' % (pid, roi_path)) if __name__=='__main__': try: time_series=sys.argv[2] config = get_config(sys.argv[1], time_series) except: sys.stderr.write('usage: [python] oii/ifcb/accession.py [config file] [time series name]\n') sys.exit(-1) logging.basicConfig(level=logging.INFO) fx = IfcbFixity(config.psql_connect) feed = IfcbFeed(config.psql_connect) with xa(config.psql_connect) as (c, db): for s in list_new_filesets(time_series,config.psql_connect,config.resolver,after_year=2005): # FIXME hardcoded try: check_integrity(s.pid, s.hdr_path, s.adc_path, s.roi_path, s.schema_version) except Exception, e: logging.warn('%s FAIL integrity checks: %s' % (s.pid, e)) continue # hot diggity, we've got some good data # compute fixity try: fx.fix(s.pid, s.hdr_path, cursor=db, filetype='hdr') logging.info('%s FIXITY computed for %s' % (s.pid, s.hdr_path)) fx.fix(s.pid, s.adc_path, cursor=db, filetype='adc') logging.info('%s FIXITY computed for %s' % (s.pid, s.adc_path)) fx.fix(s.pid, s.roi_path, cursor=db, filetype='roi')
try: time_series = sys.argv[1] except: time_series = 'mvco' config = get_config('./db.conf',time_series) outdir = config.outdir psql_connect = '%s dbname=%s' % (config.psql_connect, config.dbname) R = parse_stream(config.resolver) NAMESPACE='http://demi.whoi.edu/mvco/' feed = IfcbFeed(psql_connect) start=strptime('2005-01-01T00:00:00Z',ISO_8601_FORMAT); end=strptime('2014-01-01T00:00:00Z',ISO_8601_FORMAT); with xa(psql_connect) as (c, db): bin_lids = list(feed.between(start,end)) N=8 pids = [] for n in range(N): pid = os.fork() if pid == 0: outfile = os.path.join(outdir,'scores_%d.csv' % n) with open(outfile,'w') as of: for bin_lid in bin_lids[n::N]:
from oii.times import text2utcdatetime, ISO_8601_FORMAT try: time_series = sys.argv[1] except: time_series = 'mvco' config = get_config('./db.conf',time_series) psql_connect = '%s dbname=%s' % (config.psql_connect, config.dbname) R = parse_stream(config.resolver) NAMESPACE='http://demi.whoi.edu/mvco/' feed = IfcbFeed(psql_connect) start=strptime('2005-01-01T00:00:00Z',ISO_8601_FORMAT); end=strptime('2014-01-01T00:00:00Z',ISO_8601_FORMAT); s = 'select count(*) from autoclass where bin_lid=%s' q = 'insert into autoclass (bin_lid, class_label, roinums, scores) values (%s, %s, %s, %s)' with xa(psql_connect) as (c, db): n = 0 for bin_lid in feed.between(start,end): bin_pid = NAMESPACE + bin_lid db.execute(s,(bin_lid,)) count = db.fetchone()[0] if count == 0: