def plan(config, executor): "Identifying ORFs with significant 3-base periodicities." if config.get('skip_prediction', False): return assert os.path.exists(DATAPATH), \ "Missing pynpact/data for acgt_gamma prediction. " \ "Expected at " + DATAPATH rconfig = reducedict(config, [ 'filename', 'significance', 'GeneDescriptorSkip1', 'mycoplasma', 'stderr']) h = Hasher().hashdict(rconfig) h.hashfiletime(config['filename']) h.hashfiletime(BIN) outdir = parsing.derive_filename(config, h.hexdigest(), '.predict') log.debug("Adding prediction filenames to config dict.") # strip 4 characters off here b/c that's how acgt_gamma does # it at about lines 262-270 j = lambda ext: os.path.join( outdir, os.path.basename(config['filename'])[:-4] + ext) config['NewOrfsFile'] = config['File_of_new_CDSs'] = j(".newcds") config['ModifiedOrfsFile'] = config['File_of_published_rejected_CDSs'] = j(".modified") config['HitsFile'] = config['File_of_G+C_coding_potential_regions'] = j('.profiles') config[OUTPUTKEY] = outdir return enqueue(_acgt_gamma, executor, rconfig, outdir)
def get_hash(config): config = reducedict(config, KEYS) h = Hasher() h.hashdict(config) h.hashfiletime(BIN) h.hashfiletime(config['filename']) return config, h.hexdigest()
def plan(config, executor): if 'nprofile' in config: return config['nprofile'] = True parsing.length(config) rconfig = reducedict(config, KEYS) h = Hasher() h.hashdict(rconfig) h.hashfiletime(BIN) hash = h.hexdigest() target = parsing.derive_filename(config, hash, 'nprofile') config[OUTPUTKEY] = target config[JSONOUTPUTKEY] = target + '.json' jobs = enqueue(_nprofile, executor, rconfig, target) enqueue(_nprofile_to_json, executor, {OUTPUTKEY: target}, config[JSONOUTPUTKEY], after=jobs) return jobs