def img_resize_multi( tmp_dir: pathlib.Path, img: Image, sizes: List[Dict] = None, min_size_name: str = None, aspect_ratio: tuple = (16, 9), **kwargs) -> list: """ Resize an original image into multiple sizes. Write the outputted files into a temporary directory. Returns a list of available resolutions. """ sizes = sizes or [ {'name': 'large', 'size': (1280, 720)}, {'name': 'medium', 'size': (640, 360)}, {'name': 'small', 'size': (480, 270)}, {'name': 'tiny', 'size': (320, 180)}, {'name': 'nano', 'size': (160, 90)}, ] available_sizes = [] for size in filter(lambda x: larger_or_equal_size(img.size, x['size']), sizes): file_name = '%s.%s' % (size['name'], kwargs.get("output_ext", "png")) available_sizes.append({**size, 'file': file_name}) tmp_ = img_resize(img, size['size'], aspect_ratio=aspect_ratio) img_save(tmp_, str(tmp_dir / file_name)) if min_size_name \ and min_size_name not in lpluck('name', available_sizes): raise MinResNotAvailableError return available_sizes
def post_processing(mongo, batch_size=100, max_workers=50): indexer = Indexer(mongo) start_block = indexer.get_checkpoint('post_processing') query = { "block_num": { "$gt": start_block, "$lte": start_block + batch_size, } } projection = { '_id': 0, 'body': 0, 'json_metadata': 0, } results = list(mongo.Operations.find(query, projection=projection)) batches = map(parse_operation, results) # handle an edge case when we are too close to the head, # and the batch contains no work to do if not results and is_recent(start_block, days=1): return # squash for duplicates def custom_merge(*args): return list(set(keep(flatten(args)))) batch_items = merge_with(custom_merge, *batches) # only process accounts if the blocks are recent # scrape_all_users should take care of stale updates if is_recent(start_block, days=10): accounts = set(batch_items['accounts_light'] + batch_items['accounts']) list(thread_multi( fn=update_account, fn_args=[mongo, None], dep_args=list(accounts), fn_kwargs=dict(load_extras=False), max_workers=max_workers, re_raise_errors=False, )) list(thread_multi( fn=update_account_ops_quick, fn_args=[mongo, None], dep_args=list(accounts), fn_kwargs=None, max_workers=max_workers, re_raise_errors=False, )) index = max(lpluck('block_num', results)) indexer.set_checkpoint('post_processing', index) log.info("Checkpoint: %s - %s accounts (+%s full)" % ( index, len(batch_items['accounts_light']), len(batch_items['accounts']), ))
def decode_trc(self, actions): circ = self.aigbv sys_actions = [fn.project(a, self.inputs) for a in actions] states = fn.lpluck(0, circ.simulate(actions)) assert all(s['##valid'] for s in states) states = [fn.omit(s, {'##valid'}) for s in states] return sys_actions, states
def toggle(self, actions: Actions): """Toggles a sequence of (sys, env) actions.""" assert len(actions) == self.horizon aps = fn.lpluck(0, self.dyn.simulate(actions)) expr = preimage(aps=aps, mdp=self.dyn) bexpr, *_ = aiger_bdd.to_bdd( expr, manager=self.manager, renamer=lambda _, x: x ) return attr.evolve(self, bexpr=xor(self.bexpr, bexpr))
def test_preimage(): spec, mdp = scenario_reactive() sys1 = mdp.aigbv >> BV.sink(1, ['c_next', '##valid']) sys2 = sys1 >> BV.aig2aigbv(spec.aig) def act(action, coin): return {'a': (action, ), 'c': (coin, )} actions = [act(True, True), act(True, False), act(True, True)] observations = fn.lpluck(0, sys1.simulate(actions)) expr = preimage(observations, sys1) assert expr.inputs == { 'c##time_0', 'c##time_1', 'c##time_2', 'a##time_0', 'a##time_1', 'a##time_2', } bexpr1, manager, order = to_bdd2(sys2, horizon=3) def accepts(bexpr, actions): """Check if bexpr accepts action sequence.""" timed_actions = {} for t, action in enumerate(actions): c, a = action['c'], action['a'] timed_actions.update({ f'c##time_{t}[0]': c[0], f'a##time_{t}[0]': a[0] }) assert timed_actions.keys() == manager.vars.keys() tmp = manager.let(timed_actions, bexpr) assert tmp in (manager.true, manager.false) return tmp == manager.true assert not accepts(bexpr1, actions) bexpr2, _, input2var = aiger_bdd.to_bdd(expr, manager=manager, renamer=lambda _, x: x) assert accepts(bexpr2, actions) assert not accepts(~bexpr2, actions) bexpr3 = xor(bexpr1, bexpr2) assert accepts(bexpr3, actions)
def create_dash_job(input_key, output_path, video_resolution: tuple, has_audio: bool, segment_duration=6): segment_duration = str(segment_duration) # 720p input is minimum for DASH if video_resolution[0] < 1280 or video_resolution[1] < 720: return None, '' outputs = [] if video_resolution[0] >= 1920 and video_resolution[1] >= 1080: outputs.append({ 'Key': '1080p', 'ThumbnailPattern': '', 'Rotate': 'auto', 'PresetId': config['presets']['high']['1080p'], 'SegmentDuration': segment_duration, }) outputs.extend([ { 'Key': '480p', 'ThumbnailPattern': '', 'Rotate': 'auto', 'PresetId': config['presets']['high']['480p'], 'SegmentDuration': segment_duration, }, { 'Key': '720p', 'ThumbnailPattern': '', 'Rotate': 'auto', 'PresetId': config['presets']['high']['720p'], 'SegmentDuration': segment_duration, }, ]) if has_audio: outputs.append({ 'Key': 'audio', 'PresetId': config['presets']['high']['audio'], 'SegmentDuration': segment_duration, }) et = boto3.client( 'elastictranscoder', region_name=config['region_name'], aws_access_key_id=AWS_MANAGER_PUBLIC_KEY, aws_secret_access_key=AWS_MANAGER_PRIVATE_KEY, ) response = et.create_job( PipelineId=config['pipeline_id'], Input={ 'Key': input_key, 'FrameRate': 'auto', 'Resolution': 'auto', 'AspectRatio': 'auto', 'Interlaced': 'auto', 'Container': 'auto', }, Outputs=outputs, OutputKeyPrefix=output_path, Playlists=[ { 'Name': 'main', 'Format': 'MPEG-DASH', 'OutputKeys': lpluck('Key', outputs) }, ], UserMetadata={}, ) return response, f"{output_path}main.mpd"
def extract_errors(transcoder_job: dict): from funcy import merge, where, lpluck job = transcoder_job['Job'] outputs = merge(job['Outputs'], job['Playlists']) return lpluck('StatusDetail', where(outputs, Status='Error'))
def scrape_comments(mongo, batch_size=250, max_workers=50): """ Parse operations and post-process for comment/post extraction. """ indexer = Indexer(mongo) start_block = indexer.get_checkpoint('comments') query = { "type": "comment", "block_num": { "$gt": start_block, "$lte": start_block + batch_size, } } projection = { '_id': 0, 'block_num': 1, 'author': 1, 'permlink': 1, } results = list(mongo.Operations.find(query, projection=projection)) identifiers = set(f"{x['author']}/{x['permlink']}" for x in results) # handle an edge case when we are too close to the head, # and the batch contains no work to do if not results and is_recent(start_block, days=1): return # get Post.export() results in parallel raw_comments = thread_multi(fn=get_comment, fn_args=[None], dep_args=list(identifiers), max_workers=max_workers, yield_results=True) raw_comments = lkeep(raw_comments) # split into root posts and comments posts = lfilter(lambda x: x['depth'] == 0, raw_comments) comments = lfilter(lambda x: x['depth'] > 0, raw_comments) # Mongo upsert many log_output = '' if posts: r = mongo.Posts.bulk_write( [ UpdateOne({'identifier': x['identifier']}, {'$set': { **x, 'updatedAt': dt.datetime.utcnow() }}, upsert=True) for x in posts ], ordered=False, ) log_output += \ f'(Posts: {r.upserted_count} upserted, {r.modified_count} modified) ' if comments: r = mongo.Comments.bulk_write( [ UpdateOne({'identifier': x['identifier']}, {'$set': { **x, 'updatedAt': dt.datetime.utcnow() }}, upsert=True) for x in comments ], ordered=False, ) log_output += \ f'(Comments: {r.upserted_count} upserted, {r.modified_count} modified) ' # We are only querying {type: 'comment'} blocks and sometimes # the gaps are larger than the batch_size. index = silent(max)(lpluck('block_num', results)) or (start_block + batch_size) indexer.set_checkpoint('comments', index) log.info(f'Checkpoint: {index} {log_output}')
def has_audio_stream(ffprobe_out: dict) -> bool: streams = ffprobe_out.get('streams', {}) return 'audio' in lpluck('codec_type', streams)
def fill_probes(platform_id): platform = Platform.objects.get(pk=platform_id) gpl_name = platform.gpl_name cprint('%s %s %s' % (platform.pk, platform.gpl_name, platform.specie), attrs=['bold']) assert platform.specie platform.verdict = '' platform.probes_total = None platform.probes_matched = None platform.stats = {} platform.last_filled = timezone.now() annot_file = '/pub/geo/DATA/annotation/platforms/%s.annot.gz' % gpl_name family_file = '/pub/geo/DATA/SOFT/by_platform/%s/%s_family.soft.gz' % ( gpl_name, gpl_name) files = [annot_file, family_file] tables = list(map(peek_platform, files)) # Skip empty files = list(compress(files, tables)) tables = lkeep(tables) # TODO: check other supplementary files formats supplementary_dir = '/pub/geo/DATA/supplementary/platforms/%s/' % gpl_name _, supplementary_files = listdir(supplementary_dir) supplementary_files = [ f for f in supplementary_files if f.endswith('.txt.gz') and not re_test('\.cdf\.', f, re.I) ] files.extend(supplementary_files) tables.extend( decompress(download('%s%s' % (supplementary_dir, f))) for f in supplementary_files) platform.stats['files'] = lkeep(files) if not any(tables): cprint('No data for %s' % gpl_name, 'red') platform.verdict = 'no data' platform.save() return # Read tables in df = pd.concat( read_table(table, file) for table, file in zip(tables, files)) del tables # free memory platform.probes_total = len(set(df.index)) cprint('Found %d probes to match' % platform.probes_total, 'yellow') # import ipdb; ipdb.set_trace() # noqa # Try to resolve probes starting from best scopes mygene_probes = [] platform.stats['matches'] = [] platform.verdict = 'no clue' for scopes, cols in SCOPE_COLUMNS: cols = list(set(cols) & set(df.columns)) if not cols: continue cprint('> Looking into %s' % ', '.join(sorted(cols)), 'cyan') platform.verdict = 'nothing matched' probes = pd.concat(df[col].dropna() for col in cols) new_matches = mygene_fetch(platform, probes, scopes) mygene_probes.extend(new_matches) # Drop matched probes if new_matches: platform.stats['matches'].append({ 'scopes': scopes, 'cols': cols, 'found': len(new_matches), }) df = df.drop(lpluck('probe', new_matches)) if df.empty: break # Update stats and history platform.probes_matched = len(mygene_probes) platform.history.append({ 'time': timezone.now().strftime('%Y-%m-%d %T'), 'probes_total': platform.probes_total, 'probes_matched': platform.probes_matched, }) # Insert found genes if mygene_probes: with transaction.atomic(): platform.verdict = 'ok' platform.save() platform.probes.all().delete() PlatformProbe.objects.bulk_create([ PlatformProbe(platform=platform, **probe_info) for probe_info in mygene_probes ]) cprint('Inserted %d probes for %s' % (len(mygene_probes), gpl_name), 'green') else: cprint('Nothing matched for %s' % gpl_name, 'red') platform.save()