def get_hash(config): config = reducedict(config, KEYS) h = Hasher() h.hashdict(config) h.hashfiletime(BIN) h.hashfiletime(config['filename']) return config, h.hexdigest()
def allplots(config, executor): after = [] try: after.extend(extract.plan(config, executor)) except: pass after.extend(nprofile.plan(config, executor)) after.extend(acgt_gamma.plan(config, executor)) parsing.length(config) parsing.first_page_title(config) parsing.following_page_title(config) parsing.endBase(config) h = Hasher() # Strip down to the config for this task only rconfig = reducedict(config, KEYS + FILE_KEYS) basesPerGraph = rconfig['basesPerGraph'] graphsPerPage = rconfig['graphsPerPage'] startBase = rconfig.pop('startBase') endBase = rconfig.pop('endBase') bp_per_page = rconfig['bp_per_page'] = basesPerGraph * graphsPerPage page_count = math.ceil(float(endBase - startBase) / bp_per_page) log.info("Generating %d pages of allplots", page_count) page_num = 1 # page number offset filenames = [] waiton = [] # per-page loop while startBase < endBase: pconfig = dict(rconfig.items()) pconfig['page_num'] = page_num pconfig['startBase'] = startBase if startBase + bp_per_page < endBase: pconfig['endBase'] = startBase + bp_per_page else: pconfig['endBase'] = endBase h = Hasher().hashdict(pconfig).hashfiletime(BIN).hashfiletime(__file__) psname = parsing.derive_filename(config, h.hexdigest(), 'ps') filenames.append(psname) waiton.extend(enqueue(_ap, executor, pconfig, psname, after=after)) page_num += 1 startBase += bp_per_page # Finally set the output filenames into the master config dict config['psnames'] = filenames return waiton
def plan(config, executor): "Identifying ORFs with significant 3-base periodicities." if config.get('skip_prediction', False): return assert os.path.exists(DATAPATH), \ "Missing pynpact/data for acgt_gamma prediction. " \ "Expected at " + DATAPATH rconfig = reducedict(config, [ 'filename', 'significance', 'GeneDescriptorSkip1', 'mycoplasma', 'stderr']) h = Hasher().hashdict(rconfig) h.hashfiletime(config['filename']) h.hashfiletime(BIN) outdir = parsing.derive_filename(config, h.hexdigest(), '.predict') log.debug("Adding prediction filenames to config dict.") # strip 4 characters off here b/c that's how acgt_gamma does # it at about lines 262-270 j = lambda ext: os.path.join( outdir, os.path.basename(config['filename'])[:-4] + ext) config['NewOrfsFile'] = config['File_of_new_CDSs'] = j(".newcds") config['ModifiedOrfsFile'] = config['File_of_published_rejected_CDSs'] = j(".modified") config['HitsFile'] = config['File_of_G+C_coding_potential_regions'] = j('.profiles') config[OUTPUTKEY] = outdir return enqueue(_acgt_gamma, executor, rconfig, outdir)
def plan(config, executor): if 'nprofile' in config: return config['nprofile'] = True parsing.length(config) rconfig = reducedict(config, KEYS) h = Hasher() h.hashdict(rconfig) h.hashfiletime(BIN) hash = h.hexdigest() target = parsing.derive_filename(config, hash, 'nprofile') config[OUTPUTKEY] = target config[JSONOUTPUTKEY] = target + '.json' jobs = enqueue(_nprofile, executor, rconfig, target) enqueue(_nprofile_to_json, executor, {OUTPUTKEY: target}, config[JSONOUTPUTKEY], after=jobs) return jobs