Esempio n. 1
0
def img_resize_multi(
        tmp_dir: pathlib.Path,
        img: Image,
        sizes: List[Dict] = None,
        min_size_name: str = None,
        aspect_ratio: tuple = (16, 9), **kwargs) -> list:
    """
    Resize an original image into multiple sizes.
    Write the outputted files into a temporary directory.

    Returns a list of available resolutions.
    """
    sizes = sizes or [
        {'name': 'large', 'size': (1280, 720)},
        {'name': 'medium', 'size': (640, 360)},
        {'name': 'small', 'size': (480, 270)},
        {'name': 'tiny', 'size': (320, 180)},
        {'name': 'nano', 'size': (160, 90)},
    ]

    available_sizes = []
    for size in filter(lambda x: larger_or_equal_size(img.size, x['size']), sizes):
        file_name = '%s.%s' % (size['name'], kwargs.get("output_ext", "png"))
        available_sizes.append({**size, 'file': file_name})
        tmp_ = img_resize(img, size['size'], aspect_ratio=aspect_ratio)
        img_save(tmp_, str(tmp_dir / file_name))

    if min_size_name \
            and min_size_name not in lpluck('name', available_sizes):
        raise MinResNotAvailableError

    return available_sizes
Esempio n. 2
0
def post_processing(mongo, batch_size=100, max_workers=50):
    indexer = Indexer(mongo)
    start_block = indexer.get_checkpoint('post_processing')

    query = {
        "block_num": {
            "$gt": start_block,
            "$lte": start_block + batch_size,
        }
    }
    projection = {
        '_id': 0,
        'body': 0,
        'json_metadata': 0,
    }
    results = list(mongo.Operations.find(query, projection=projection))
    batches = map(parse_operation, results)

    # handle an edge case when we are too close to the head,
    # and the batch contains no work to do
    if not results and is_recent(start_block, days=1):
        return

    # squash for duplicates
    def custom_merge(*args):
        return list(set(keep(flatten(args))))

    batch_items = merge_with(custom_merge, *batches)

    # only process accounts if the blocks are recent
    # scrape_all_users should take care of stale updates
    if is_recent(start_block, days=10):
        accounts = set(batch_items['accounts_light'] +
                       batch_items['accounts'])
        list(thread_multi(
            fn=update_account,
            fn_args=[mongo, None],
            dep_args=list(accounts),
            fn_kwargs=dict(load_extras=False),
            max_workers=max_workers,
            re_raise_errors=False,
        ))
        list(thread_multi(
            fn=update_account_ops_quick,
            fn_args=[mongo, None],
            dep_args=list(accounts),
            fn_kwargs=None,
            max_workers=max_workers,
            re_raise_errors=False,
        ))

    index = max(lpluck('block_num', results))
    indexer.set_checkpoint('post_processing', index)

    log.info("Checkpoint: %s - %s accounts (+%s full)" % (
        index,
        len(batch_items['accounts_light']),
        len(batch_items['accounts']),
    ))
Esempio n. 3
0
    def decode_trc(self, actions):
        circ = self.aigbv
        sys_actions = [fn.project(a, self.inputs) for a in actions]

        states = fn.lpluck(0, circ.simulate(actions))
        assert all(s['##valid'] for s in states)
        states = [fn.omit(s, {'##valid'}) for s in states]

        return sys_actions, states
Esempio n. 4
0
    def toggle(self, actions: Actions):
        """Toggles a sequence of (sys, env) actions."""
        assert len(actions) == self.horizon
        aps = fn.lpluck(0, self.dyn.simulate(actions))
        expr = preimage(aps=aps, mdp=self.dyn)
        bexpr, *_ = aiger_bdd.to_bdd(
            expr, manager=self.manager, renamer=lambda _, x: x
        )

        return attr.evolve(self, bexpr=xor(self.bexpr, bexpr))
Esempio n. 5
0
def test_preimage():
    spec, mdp = scenario_reactive()

    sys1 = mdp.aigbv >> BV.sink(1, ['c_next', '##valid'])
    sys2 = sys1 >> BV.aig2aigbv(spec.aig)

    def act(action, coin):
        return {'a': (action, ), 'c': (coin, )}

    actions = [act(True, True), act(True, False), act(True, True)]
    observations = fn.lpluck(0, sys1.simulate(actions))

    expr = preimage(observations, sys1)
    assert expr.inputs == {
        'c##time_0',
        'c##time_1',
        'c##time_2',
        'a##time_0',
        'a##time_1',
        'a##time_2',
    }

    bexpr1, manager, order = to_bdd2(sys2, horizon=3)

    def accepts(bexpr, actions):
        """Check if bexpr accepts action sequence."""
        timed_actions = {}
        for t, action in enumerate(actions):
            c, a = action['c'], action['a']
            timed_actions.update({
                f'c##time_{t}[0]': c[0],
                f'a##time_{t}[0]': a[0]
            })

        assert timed_actions.keys() == manager.vars.keys()
        tmp = manager.let(timed_actions, bexpr)
        assert tmp in (manager.true, manager.false)
        return tmp == manager.true

    assert not accepts(bexpr1, actions)

    bexpr2, _, input2var = aiger_bdd.to_bdd(expr,
                                            manager=manager,
                                            renamer=lambda _, x: x)

    assert accepts(bexpr2, actions)
    assert not accepts(~bexpr2, actions)

    bexpr3 = xor(bexpr1, bexpr2)

    assert accepts(bexpr3, actions)
Esempio n. 6
0
def create_dash_job(input_key,
                    output_path,
                    video_resolution: tuple,
                    has_audio: bool,
                    segment_duration=6):
    segment_duration = str(segment_duration)

    # 720p input is minimum for DASH
    if video_resolution[0] < 1280 or video_resolution[1] < 720:
        return None, ''

    outputs = []
    if video_resolution[0] >= 1920 and video_resolution[1] >= 1080:
        outputs.append({
            'Key': '1080p',
            'ThumbnailPattern': '',
            'Rotate': 'auto',
            'PresetId': config['presets']['high']['1080p'],
            'SegmentDuration': segment_duration,
        })

    outputs.extend([
        {
            'Key': '480p',
            'ThumbnailPattern': '',
            'Rotate': 'auto',
            'PresetId': config['presets']['high']['480p'],
            'SegmentDuration': segment_duration,
        },
        {
            'Key': '720p',
            'ThumbnailPattern': '',
            'Rotate': 'auto',
            'PresetId': config['presets']['high']['720p'],
            'SegmentDuration': segment_duration,
        },
    ])

    if has_audio:
        outputs.append({
            'Key': 'audio',
            'PresetId': config['presets']['high']['audio'],
            'SegmentDuration': segment_duration,
        })

    et = boto3.client(
        'elastictranscoder',
        region_name=config['region_name'],
        aws_access_key_id=AWS_MANAGER_PUBLIC_KEY,
        aws_secret_access_key=AWS_MANAGER_PRIVATE_KEY,
    )
    response = et.create_job(
        PipelineId=config['pipeline_id'],
        Input={
            'Key': input_key,
            'FrameRate': 'auto',
            'Resolution': 'auto',
            'AspectRatio': 'auto',
            'Interlaced': 'auto',
            'Container': 'auto',
        },
        Outputs=outputs,
        OutputKeyPrefix=output_path,
        Playlists=[
            {
                'Name': 'main',
                'Format': 'MPEG-DASH',
                'OutputKeys': lpluck('Key', outputs)
            },
        ],
        UserMetadata={},
    )

    return response, f"{output_path}main.mpd"
Esempio n. 7
0
def extract_errors(transcoder_job: dict):
    from funcy import merge, where, lpluck
    job = transcoder_job['Job']
    outputs = merge(job['Outputs'], job['Playlists'])
    return lpluck('StatusDetail', where(outputs, Status='Error'))
Esempio n. 8
0
def scrape_comments(mongo, batch_size=250, max_workers=50):
    """ Parse operations and post-process for comment/post extraction. """
    indexer = Indexer(mongo)
    start_block = indexer.get_checkpoint('comments')

    query = {
        "type": "comment",
        "block_num": {
            "$gt": start_block,
            "$lte": start_block + batch_size,
        }
    }
    projection = {
        '_id': 0,
        'block_num': 1,
        'author': 1,
        'permlink': 1,
    }
    results = list(mongo.Operations.find(query, projection=projection))
    identifiers = set(f"{x['author']}/{x['permlink']}" for x in results)

    # handle an edge case when we are too close to the head,
    # and the batch contains no work to do
    if not results and is_recent(start_block, days=1):
        return

    # get Post.export() results in parallel
    raw_comments = thread_multi(fn=get_comment,
                                fn_args=[None],
                                dep_args=list(identifiers),
                                max_workers=max_workers,
                                yield_results=True)
    raw_comments = lkeep(raw_comments)

    # split into root posts and comments
    posts = lfilter(lambda x: x['depth'] == 0, raw_comments)
    comments = lfilter(lambda x: x['depth'] > 0, raw_comments)

    # Mongo upsert many
    log_output = ''
    if posts:
        r = mongo.Posts.bulk_write(
            [
                UpdateOne({'identifier': x['identifier']},
                          {'$set': {
                              **x, 'updatedAt': dt.datetime.utcnow()
                          }},
                          upsert=True) for x in posts
            ],
            ordered=False,
        )
        log_output += \
            f'(Posts: {r.upserted_count} upserted, {r.modified_count} modified) '
    if comments:
        r = mongo.Comments.bulk_write(
            [
                UpdateOne({'identifier': x['identifier']},
                          {'$set': {
                              **x, 'updatedAt': dt.datetime.utcnow()
                          }},
                          upsert=True) for x in comments
            ],
            ordered=False,
        )
        log_output += \
            f'(Comments: {r.upserted_count} upserted, {r.modified_count} modified) '

    # We are only querying {type: 'comment'} blocks and sometimes
    # the gaps are larger than the batch_size.
    index = silent(max)(lpluck('block_num',
                               results)) or (start_block + batch_size)
    indexer.set_checkpoint('comments', index)

    log.info(f'Checkpoint: {index} {log_output}')
Esempio n. 9
0
def has_audio_stream(ffprobe_out: dict) -> bool:
    streams = ffprobe_out.get('streams', {})
    return 'audio' in lpluck('codec_type', streams)
Esempio n. 10
0
def fill_probes(platform_id):
    platform = Platform.objects.get(pk=platform_id)
    gpl_name = platform.gpl_name
    cprint('%s %s %s' % (platform.pk, platform.gpl_name, platform.specie),
           attrs=['bold'])
    assert platform.specie

    platform.verdict = ''
    platform.probes_total = None
    platform.probes_matched = None
    platform.stats = {}
    platform.last_filled = timezone.now()

    annot_file = '/pub/geo/DATA/annotation/platforms/%s.annot.gz' % gpl_name
    family_file = '/pub/geo/DATA/SOFT/by_platform/%s/%s_family.soft.gz' % (
        gpl_name, gpl_name)
    files = [annot_file, family_file]
    tables = list(map(peek_platform, files))
    # Skip empty
    files = list(compress(files, tables))
    tables = lkeep(tables)

    # TODO: check other supplementary files formats
    supplementary_dir = '/pub/geo/DATA/supplementary/platforms/%s/' % gpl_name
    _, supplementary_files = listdir(supplementary_dir)
    supplementary_files = [
        f for f in supplementary_files
        if f.endswith('.txt.gz') and not re_test('\.cdf\.', f, re.I)
    ]
    files.extend(supplementary_files)
    tables.extend(
        decompress(download('%s%s' % (supplementary_dir, f)))
        for f in supplementary_files)
    platform.stats['files'] = lkeep(files)

    if not any(tables):
        cprint('No data for %s' % gpl_name, 'red')
        platform.verdict = 'no data'
        platform.save()
        return

    # Read tables in
    df = pd.concat(
        read_table(table, file) for table, file in zip(tables, files))
    del tables  # free memory
    platform.probes_total = len(set(df.index))
    cprint('Found %d probes to match' % platform.probes_total, 'yellow')
    # import ipdb; ipdb.set_trace()  # noqa

    # Try to resolve probes starting from best scopes
    mygene_probes = []
    platform.stats['matches'] = []
    platform.verdict = 'no clue'
    for scopes, cols in SCOPE_COLUMNS:
        cols = list(set(cols) & set(df.columns))
        if not cols:
            continue
        cprint('> Looking into %s' % ', '.join(sorted(cols)), 'cyan')
        platform.verdict = 'nothing matched'

        probes = pd.concat(df[col].dropna() for col in cols)
        new_matches = mygene_fetch(platform, probes, scopes)
        mygene_probes.extend(new_matches)

        # Drop matched probes
        if new_matches:
            platform.stats['matches'].append({
                'scopes': scopes,
                'cols': cols,
                'found': len(new_matches),
            })

            df = df.drop(lpluck('probe', new_matches))
            if df.empty:
                break

    # Update stats and history
    platform.probes_matched = len(mygene_probes)
    platform.history.append({
        'time': timezone.now().strftime('%Y-%m-%d %T'),
        'probes_total': platform.probes_total,
        'probes_matched': platform.probes_matched,
    })

    # Insert found genes
    if mygene_probes:
        with transaction.atomic():
            platform.verdict = 'ok'
            platform.save()

            platform.probes.all().delete()
            PlatformProbe.objects.bulk_create([
                PlatformProbe(platform=platform, **probe_info)
                for probe_info in mygene_probes
            ])
        cprint('Inserted %d probes for %s' % (len(mygene_probes), gpl_name),
               'green')
    else:
        cprint('Nothing matched for %s' % gpl_name, 'red')
        platform.save()