def main(buckets, procs=2): if not buckets: buckets = ('images', 'sounds') objects = objects_for_buckets(buckets) t1 = datetime.now() logger.info("Checking derivatives for %d objects", len(objects)) if procs > 1: apidbpool.closeall() pool = gipcpool.Pool(procs) c = ilen(pool.imap_unordered(process_objects, grouper(objects, 1000))) logger.debug("Finished %d subprocesses", c) else: process_objects(objects) logger.info("Completed derivatives run in %s", (datetime.now() - t1))
def start_all_procs(groups, running=None): if running is None: running = {} apidbpool.closeall() # clean before proc fork for prefix, items in groups: if prefix in running: if prefix is None: # We can't disambiguate if we don't have a prefix; just # skip it until running[None] is empty again pass else: logger.critical("Trying to start second process for prefix %r", prefix) continue logger.debug("Starting subprocess for %s", prefix) running[prefix] = gipc.start_process( process_list, (items, ), {'forprefix': prefix}, name="mediaing-{0}".format(prefix), daemon=False) return running
def allrsids(since=None, ingest=False): from .db_rsids import get_active_rsids rsids = get_active_rsids(since=since) logger.info("Checking %s recordsets", len(rsids)) from .db_rsids import get_paused_rsids paused_recordsets = get_paused_rsids() logger.info("Paused recordsets: {0}, rsids: {1}".format( len(paused_recordsets), paused_recordsets)) # Need to ensure all the connections are closed before multiprocessing forks apidbpool.closeall() pool = gipcpool.Pool() exitcodes = pool.imap_unordered( functools.partial(launch_child, ingest=ingest), rsids) badcount = ilen(e for e in exitcodes if e != 0) if badcount: logger.critical("%d children failed", badcount) from .ds_sum_counts import main as ds_sum_counts ds_sum_counts('./', sum_filename='summary.csv', susp_filename="suspects.csv")
"-f", required=False, default=False, action='store_true', help= "Force updating main table with new records even if changed records are found." ) args = argparser.parse_args() if args.bucket != "": BUCKETS = [args.bucket] if args.prefix != "": PREFIX = args.prefix build_temp_table(BUCKETS, PREFIX) new = flag_new_records(PREFIX) changed = flag_changed_records(PREFIX) logger.info("Found {0} new records".format(new)) if changed > 0: logger.error( "Found {0} changed records, inspect {1} for rows with ceph_status='changed'" .format(changed, TMP_TABLE)) if not args.force: raw_input("Press any key to continue or Ctl-C to cancel ") etagged = backfill_flagged_etags(PREFIX) copy_new_to_ceph_objects(PREFIX) apidbpool.closeall()