Esempio n. 1
0
def user_summary_for(rtype, storages, output_path: Path):
    ustats = {}

    def reg(user, query, stats):
        if user not in ustats:
            ustats[user] = {}
        ustats[user][query] = stats

    with ProcessPoolExecutor() as pp:
        digests = pp.map(get_digest, [s.path for s in storages])

    for s, digest in zip(storages, digests):
        everything = flatten([ch for ch in digest.changes.values()])
        for user, items in group_by_key(everything,
                                        key=lambda x: x.user).items():
            reg(user, s.name, len(items))

    now = datetime.now()
    doc = dominate.document(
        title=
        f'axol tags summary for {[s.name for s in storages]}, rendered at {fdate(now)}'
    )
    with doc.head:
        T.style(STYLE)
        raw_script(JS)  # TODO necessary?

        # TODO FIXME can't inline due to some utf shit
        sortable_js = Path(__file__).absolute().parent / 'js' / 'sorttable.js'
        T.script(src=str(sortable_js))

    ft = FormatTrait.for_(rtype)
    with doc.body:
        with T.table(cls='sortable'):
            emitted_head = False
            for user, stats in sorted(ustats.items(),
                                      key=lambda x: (-len(x[1]), x)):
                if not emitted_head:
                    with T.thead():
                        T.td('user')
                        for q, _ in stats.items():
                            T.td(q)
                    emitted_head = True

                with T.tr():
                    T.td(ft.user_link(user))
                    for q, st in stats.items():
                        with T.td(sorttable_customkey=str(st)):
                            # TODO I guess unclear which tag to choose though.
                            T.a(
                                q, href=f'summary/{q}.html'
                            )  # TODO link to source in index? or on pinboard maybe
                            # TODO also project onto user's tags straight away
                            T.sup(
                                str(st) if st < 5 else T.b(
                                    T.font(str(st), color='red')))  # TODO css

    output_path.write_text(str(doc))
    logger.info('Dumped user summary to %s', output_path)
Esempio n. 2
0
def sleeps_by_date() -> Dict[date, SleepEntry]:
    logger = get_logger()

    sleeps = load_sleeps()
    sleeps = [s for s in sleeps if s.graph.exists()] # TODO careful..
    res = {}
    for dd, group in group_by_key(sleeps, key=lambda s: s.date_).items():
        if len(group) == 1:
            res[dd] = group[0]
        else:
            # TODO short ones I can ignore I guess. but won't bother now
            logger.error('multiple sleeps on %s: %s', dd, group)
    return res
Esempio n. 3
0
def get_tg_tasks():
    forwarded = []
    with codecs.open(BACKUP_PATH, 'r', 'utf-8') as bp:
        for line in bp.readlines():
            j = json_loads(line)
            if j['event'] == 'message':
                if 'fwd_from' in j:
                    forwarded.append(j)

    # apparently, date is appropriate as a 'unit of forwarding'
    grouped = group_by_key(forwarded, lambda f: f['date'])
    tasks = []
    for _, group in sorted(grouped.items(), key=lambda f: f[0]):
        id_, title, texts = format_group(group)
        tasks.append((id_, title, texts))
    return tasks
Esempio n. 4
0
def render_summary(repo: Path, digest: Changes[Any], rendered: Path) -> Path:
    rtype = get_result_type(repo)  # TODO ??
    # ODO just get trait for type??

    Cumulative = CumulativeBase.for_(rtype)

    NOW = datetime.now()
    name = repo.stem

    everything = flatten([ch for ch in digest.changes.values()])

    before = len(everything)

    grouped = group_by_key(everything, key=Cumulative.cumkey)
    print(f'before: {before}, after: {len(grouped)}')

    cumulatives = list(map(Cumulative, grouped.values()))
    cumulatives = list(sorted(cumulatives, key=Cumulative.sortkey))

    doc = dominate.document(
        title=f'axol results for {name}, rendered at {fdate(NOW)}')
    with doc.head:
        T.style(STYLE)
        raw_script(JS)
    with doc:
        T.h3("This is axol search summary")
        T.div(
            "You can use 'hide' function in JS (chrome debugger) to hide certain tags/subreddits/users"
        )
        T.h4("Sources summary")
        # TODO wrap in div?
        with T.div():
            Cumulative.sources_summary(everything)
        for cc in cumulatives:
            T.div(cc.format(), cls='item')

    rendered.mkdir(exist_ok=True, parents=True)
    sf = rendered.joinpath(name + '.html')
    with sf.open('w') as fo:
        fo.write(str(doc))
    return sf
Esempio n. 5
0
def _get_kml(items):
    kml = KmlMaker()
    from kython import group_by_key
    for lname, places in group_by_key(items, key=lambda p: p.lst).items():
        color = places[0].color
        style_url = None if color is None else kml.make_icon_style(color=color)
        marks = []
        for p in places:
            pm = K.Placemark(
                id=p.name,
                name=p.name,
                description=p.description,
                styleUrl=style_url,
            )
            pm.geometry = Point(p.lng, p.lat)
            marks.append(pm)
        kml.add_folder(
            name=lname,
            items=marks,
        )

    ss = kml.to_string(prettyprint=True)
    return '<?xml version="1.0" encoding="UTF-8"?>\n' + ss
Esempio n. 6
0
def process():
    import json
    from pprint import pprint
    from pathlib import Path
    from kython import group_by_key
    anns = json.loads(Path('./res.json').read_text())

    anns = [a for a in anns if 'RhoChiPlanReviews' not in a['uri']]

    groups = group_by_key(anns, key=lambda a: a['user'])
    for k, g in sorted(groups.items(), key=lambda i: len(i[1])):
        print(f'{k}: {len(g)}')
        docs = []
        for a in g:
            title = a["document"].get("title", [None])[0]
            uri = a["uri"]
            if is_visited(uri):
                docs.append((uri, a['links']['incontext']))
                # TODO incontext??
                # docs.append(title or uri)

        for u, x in sorted(docs):
            print('  ' + u)
            print('    ' + x)
Esempio n. 7
0
def user_summary(storages, output_dir: Path):
    for src, st in group_by_key(storages, key=lambda s: s.source).items():
        rtype = the(get_result_type(x) for x in st)
        outf = output_dir / (For(src).name + '_users.html')
        user_summary_for(rtype=rtype, storages=st, output_path=outf)
Esempio n. 8
0
def render_latest(repo: Path, digest, rendered: Path):
    logger.info('processing %s', repo)

    rtype = get_result_type(repo)
    Format = FormatTrait.for_(rtype)
    Ignore = IgnoreTrait.for_(rtype)

    import pytz
    NOW = datetime.now(tz=pytz.utc)

    name = repo.stem
    doc = dominate.document(
        title=f'axol results for {name}, rendered at {fdate(NOW)}')

    with doc.head:
        T.style(STYLE)
        raw_script(JS)

        T.link(
            rel='stylesheet',
            href=
            "https://cdnjs.cloudflare.com/ajax/libs/codemirror/5.48.2/codemirror.min.css"
        )
        T.script(
            src=
            'https://cdnjs.cloudflare.com/ajax/libs/codemirror/5.48.2/codemirror.js'
        )  # TODO use min?

    citems: Iterator[Tuple[datetime, Item]] = chain.from_iterable(
        ((d, x) for x in zz) for d, zz in digest.changes.items())
    # group according to link, so we can display already occuring items along with newer occurences
    items2: List[Sequence[Tuple[datetime, Item]]] = [
        grp for _, grp in group_by_key(citems,
                                       key=lambda p: f'{p[1].link}').items()
    ]

    # TODO sort within each group?

    def min_dt(group: Sequence[Tuple[datetime, Item]]) -> datetime:
        return min(g[0] for g in group)

    # TODO ok, this is def too many types here...
    items3: Mapping[datetime,
                    List[Sequence[Tuple[datetime,
                                        Item]]]] = group_by_key(items2,
                                                                key=min_dt)

    rss = True
    if rss:
        # pip3 install feedgen
        from feedgen.feed import FeedGenerator  # type: ignore
        fg = FeedGenerator()
        # TODO memorize items?
        fg.title(name)
        fg.id('axol/' + name)
        first = True
        for d, items in sorted(items3.items()):
            litems = list(items)
            logger.info('%s %s: atom, dumping %d items', name, d, len(litems))
            if first:
                logger.info("SKIPPING first batch to prevent RSS bloat")
                first = False
                continue
            for zz in litems:
                fe = fg.add_entry()
                # TODO not sure about css?
                # TODO not sure which date should use? I gues crawling date makes more sense..
                _d, z = zz[0]  # TODO meh!
                id_ = z.uid  # TODO FIXME!!

                fe.id(id_)
                title = Format.title(zz) or '<no title>'  # meh
                fe.title(title)
                fe.link(href=Format.link(zz))
                # TODO not sure if it's a reasonable date to use...
                fe.published(published=d)
                fe.author(author={'name': z.user})  # TODO maybe, concat users?

                ignored = Ignore.ignore_group(zz)
                if ignored is not None:
                    # TODO not sure if it highlights with read or something?
                    content = ignored
                else:
                    content = Format.format(zz)

                # eh, XML was complaining at some non-utf characters
                content = str(content)

                # https://stackoverflow.com/a/25920392/706389 make lxml happy...
                content = re.sub(
                    u'[^\u0020-\uD7FF\u0009\u000A\u000D\uE000-\uFFFD\U00010000-\U0010FFFF]+',
                    '', content)
                fe.content(content=content, type='CDATA')
                # fe.updated(updated=NOW)

                # TODO assemble a summary similar to HTML?
                # fe.summary()
        atomfeed = fg.atom_str(pretty=True)

        # eh, my feed reader (miniflux) can't handle it if it's 'cdata'
        # not sure which one is right
        # ugh, that didn't work because escaping desicion is based on CDATA attribute...
        atomfeed = atomfeed.replace(b'type="CDATA"', b'type="html"')
        # fe._FeedEntry__atom_content['type'] = 'html'

        atomdir = rendered / 'atom'
        atomdir.mkdir(parents=True, exist_ok=True)
        (atomdir / (name + '.xml')).write_bytes(atomfeed)

    with doc:
        with T.div(id='sidebar'):
            T.label('Blacklisted:', for_='blacklisted')
            T.div(id='blacklisted')
            T.textarea(id='blacklist-edit', rows=10)
            T.button('apply', id='blacklist-apply')

        odd = True
        for d, items in sorted(items3.items(), reverse=True):
            litems = list(items)
            odd = not odd
            logger.info('%s %s: dumping %d items', name, d, len(litems))
            with T.div(cls='day-changes'):
                with T.div():
                    T.b(fdate(d))
                    T.span(f'{len(litems)} items')

                with T.div(
                        cls=f'day-changes-inner {"odd" if odd else "even"}'):
                    for i in items:
                        # TODO FIXME use getattr to specialise trait?
                        # TODO FIXME ignore should be at changes collecting stage?

                        ignored = Ignore.ignore_group(i)
                        if ignored is not None:
                            # TODO maybe let format result handle that... not sure
                            T.div(ignored, cls='item ignored')
                            # TODO log maybe?
                            # TODO eh. need to handle in cumulatives...
                        else:
                            fi = Format.format(i)
                            T.div(fi, cls='item')
        # f*****g hell.. didn't manage to render content inside iframe no matter how I tried..
        # with T.iframe(id='blacklist', src=''):
        #     pass

    # TODO perhaps needs to be iterative...
    rf = rendered / (name + '.html')
    with rf.open('w') as fo:
        fo.write(str(doc))
    return rf