コード例 #1
0
    def test_parse_orgmode_list(self):
        org = orgparse.loads('''#+STARTUP: showall

- Lorem ipsum foo. <2019-01-17 Thu>
- bar <2019-01-18 Fri 11:30>
- spam [2021-05-13 Thu]
''')
        subprovider = 'my_provider'
        result = list(parse_orgmode_list(org, subprovider))
        expected = [
            Item.normalized(
                datetime_=datetime.datetime(2019, 1, 17),
                text='Lorem ipsum foo.',
                provider='orgmodelist',
                subprovider=subprovider,
                all_day=True,
            ),
            Item.normalized(
                datetime_=datetime.datetime(2019, 1, 18, 11, 30),
                text='bar',
                provider='orgmodelist',
                subprovider=subprovider,
                all_day=False,
            ),
            Item.normalized(
                datetime_=datetime.datetime(2021, 5, 13),
                text='spam',
                provider='orgmodelist',
                subprovider=subprovider,
                all_day=True,
            ),
        ]
        self.assertListEqual(result, expected)
コード例 #2
0
ファイル: main.py プロジェクト: jakubvalenta/automatic-diary
def parse_orgmode(f: IO, subprovider: str) -> Iterator[Item]:
    current_datetime: Optional[datetime.datetime] = None
    current_paragraph: List[str] = []
    lines = peekable(f)
    for line in lines:
        line_clean = line.strip()
        if line_clean:
            m = regex_heading.match(line_clean)
            # Title line
            if m:
                if m.group('todo'):
                    current_datetime = None
                else:
                    current_datetime = datetime.datetime.strptime(
                        m.group('date'), '%Y-%m-%d %a')
            # Paragraph line but not before first heading
            elif current_datetime:
                current_paragraph.append(line_clean)
        # Empty line after paragraph or last line of file
        if not line_clean or not lines:
            if current_datetime and current_paragraph:
                yield Item.normalized(
                    datetime_=current_datetime,
                    text='\n'.join(current_paragraph),
                    provider=provider,
                    subprovider=subprovider,
                    all_day=True,
                )
                current_paragraph.clear()
コード例 #3
0
ファイル: main.py プロジェクト: jakubvalenta/automatic-diary
def _parse_events(events_data: Iterable[str],
                  subprovider: str) -> Iterator[Item]:
    for event_data in events_data:
        lines = io.StringIO(event_data)
        for event in parse_calendar(lines):
            yield Item.normalized(
                datetime_=event.begin,
                text=event.name,
                provider=provider,
                subprovider=subprovider,
                all_day=event.all_day,
            )
コード例 #4
0
def main(config: dict, *args, **kwargs) -> Iterator[Item]:
    path = config['path']
    username = config['username']
    logger.info('Reading Facebook archive %s', path)
    soup = _read_html(path)
    for status in _parse_timeline_page(soup):
        yield Item.normalized(
            datetime_=status.datetime_,
            text=status.text,
            provider=provider,
            subprovider=username,
        )
コード例 #5
0
ファイル: main.py プロジェクト: jakubvalenta/automatic-diary
def _parse_ratings_pages(
    soups: Iterable[BeautifulSoup], subprovider: str
) -> Iterator[Item]:
    for soup in soups:
        for film in _parse_ratings_page(soup):
            yield Item.normalized(
                datetime_=film.datetime_,
                text=film.title,
                provider=provider,
                subprovider=subprovider,
                all_day=True,
            )
コード例 #6
0
ファイル: main.py プロジェクト: jakubvalenta/automatic-diary
def _parse_tweets_file(path: Path) -> Iterator[Item]:
    with path.open() as f:
        f.readline()  # Skip first line, which is not JSOn
        tweets_data = json.load(f)
    for tweet_data in tweets_data:
        datetime_ = datetime.datetime.strptime(tweet_data['created_at'],
                                               '%Y-%m-%d %H:%M:%S %z')
        text = tweet_data['text']
        screen_name = tweet_data['user']['screen_name']
        yield Item.normalized(
            datetime_=datetime_,
            text=text,
            provider=provider,
            subprovider=screen_name,
        )
コード例 #7
0
def main(config: dict, *args, **kwargs) -> Iterator[Item]:
    paths = config['paths']
    unique_events: List[Event] = []
    for path_str in paths:
        path = Path(path_str)
        subprovider = path.name
        for event in _read_calendar(path):
            if event not in unique_events:
                yield Item.normalized(
                    datetime_=event.begin,
                    text=event.name,
                    provider=provider,
                    subprovider=subprovider,
                    all_day=event.all_day,
                )
                unique_events.append(event)
コード例 #8
0
def _read_git_logs(repo_paths: Iterable[str], author: str) -> Iterator[Item]:
    for repo_path in repo_paths:
        logger.info('Reading repository %s', repo_path)
        repo_name = os.path.basename(repo_path)
        try:
            log = _call_git_log(repo_path, author)
        except subprocess.CalledProcessError:
            continue
        for log_line in log.splitlines():
            formatted_datetime_, text = log_line.split(',', maxsplit=1)
            datetime_ = datetime.datetime.fromisoformat(formatted_datetime_)
            yield Item.normalized(
                datetime_=datetime_,
                text=text,
                provider=provider,
                subprovider=repo_name,
            )
コード例 #9
0
ファイル: main.py プロジェクト: jakubvalenta/automatic-diary
def main(config: dict, *args, **kwargs) -> Iterator[Item]:
    path = Path(config['path'])
    subprovider = path.name
    logger.info('Reading todo.txt file %s', path)
    with path.open() as f:
        for line in f:
            m = regex_line.match(line)
            if not m:
                continue
            datetime_ = datetime.datetime(
                int(m.group('y')), int(m.group('m')), int(m.group('d'))
            )
            text = _clean_text(m.group('text'))
            yield Item.normalized(
                datetime_=datetime_,
                text=text,
                provider=provider,
                subprovider=subprovider,
                all_day=True,
            )
コード例 #10
0
def main(config: dict, *args, **kwargs) -> Iterator[Item]:
    path = Path(config['path'])
    subprovider = path.name
    logger.info('Reading CSV file %s', path)
    renderer = pystache.Renderer(escape=lambda u: u)
    date_source_tmpl = pystache.parse(config['date_source'])
    text_source_tmpl = pystache.parse(config['text_source'])
    with path.open() as f:
        reader = csv.DictReader(f)
        for row in reader:
            text = renderer.render(text_source_tmpl, row)
            datetime_ = datetime.datetime.strptime(
                renderer.render(date_source_tmpl, row), config['date_format'])
            yield Item.normalized(
                datetime_=datetime_,
                text=text,
                provider=provider,
                subprovider=subprovider,
                all_day=True,
            )
コード例 #11
0
ファイル: main.py プロジェクト: jakubvalenta/automatic-diary
def _read_messages(pathname: str, sent: bool) -> Iterator[Item]:
    for path in glob.glob(pathname):
        logger.info('Reading message %s', path)
        with open(path, 'rb') as f:
            email_message = email.message_from_binary_file(f)
        if not email_message['Date']:
            logger.warning('Skipping message without date: %s', path)
            continue
        datetime_ = _parse_date(email_message['Date'])
        text = _format_text(
            _parse_address(email_message['From']),
            _parse_address(email_message['To']),
            _decode_header(email_message['Subject']),
            sent,
        )
        yield Item.normalized(
            datetime_=datetime_,
            text=text,
            provider=provider,
            subprovider=pathname,
        )
コード例 #12
0
def parse_orgmode_list(org: orgparse.OrgNode,
                       subprovider: str) -> Iterator[Item]:
    for line in org.root.body.splitlines():
        if not line or line.startswith('#'):
            continue
        m = regex_item.search(line)
        if not m:
            raise OrgModeError(f'Unknow format of line "{line}"')
        text = m.group('text')
        date_str = m.group('date')
        datetime_ = dateparser.parse(date_str)
        if not datetime_:
            logger.warn('Failed to parse date "%s"', date_str)
            continue
        all_day = not any((datetime_.hour, datetime_.minute, datetime_.second))
        yield Item.normalized(
            datetime_=datetime_,
            text=text,
            provider=provider,
            subprovider=subprovider,
            all_day=all_day,
        )
コード例 #13
0
    def test_parse_txt(self):
        f = io.StringIO('''2019-01-17 Čt
    foo
    bar
        baz baz
2019-01-18 Pá
2019-01-19
    one
        two
            foo
            three
                four
                bar
            baz
        spam
    lorem
''')
        subprovider = 'my_provider'
        result = list(parse_txt(f, subprovider))
        expected = [
            Item.normalized(
                datetime_=datetime.datetime(2019, 1, 17),
                text='foo',
                provider='txt',
                subprovider=subprovider,
                all_day=True,
            ),
            Item.normalized(
                datetime_=datetime.datetime(2019, 1, 17),
                text='bar: baz baz',
                provider='txt',
                subprovider=subprovider,
                all_day=True,
            ),
            Item.normalized(
                datetime_=datetime.datetime(2019, 1, 19),
                text='one: two: foo',
                provider='txt',
                subprovider=subprovider,
                all_day=True,
            ),
            Item.normalized(
                datetime_=datetime.datetime(2019, 1, 19),
                text='one: two: three four bar',
                provider='txt',
                subprovider=subprovider,
                all_day=True,
            ),
            Item.normalized(
                datetime_=datetime.datetime(2019, 1, 19),
                text='one: two: baz',
                provider='txt',
                subprovider=subprovider,
                all_day=True,
            ),
            Item.normalized(
                datetime_=datetime.datetime(2019, 1, 19),
                text='one: spam',
                provider='txt',
                subprovider=subprovider,
                all_day=True,
            ),
            Item.normalized(
                datetime_=datetime.datetime(2019, 1, 19),
                text='lorem',
                provider='txt',
                subprovider=subprovider,
                all_day=True,
            ),
        ]
        self.assertListEqual(result, expected)
コード例 #14
0
def _read_items(csv_path: str) -> Iterator[Item]:
    with open(csv_path) as f:
        reader = csv.reader(f)
        for row in reader:
            yield Item.from_tuple(row)
コード例 #15
0
    def test_parse_orgmode(self):
        f = io.StringIO('''#+STARTUP: showall

* <2019-01-17 Thu>

foo
bar


two empty lines are okay

* <2019-01-18 Fri>
missing empty line is okay

* <2019-01-19 Sat>

something

something else
- with
- a
- list

* TODO <2019-01-20 Sun>

ignore this

''')
        subprovider = 'my_provider'
        result = list(parse_orgmode(f, subprovider))
        expected = [
            Item.normalized(
                datetime_=datetime.datetime(2019, 1, 17),
                text='foo\nbar',
                provider='orgmode',
                subprovider=subprovider,
                all_day=True,
            ),
            Item.normalized(
                datetime_=datetime.datetime(2019, 1, 17),
                text='two empty lines are okay',
                provider='orgmode',
                subprovider=subprovider,
                all_day=True,
            ),
            Item.normalized(
                datetime_=datetime.datetime(2019, 1, 18),
                text='missing empty line is okay',
                provider='orgmode',
                subprovider=subprovider,
                all_day=True,
            ),
            Item.normalized(
                datetime_=datetime.datetime(2019, 1, 19),
                text='something',
                provider='orgmode',
                subprovider=subprovider,
                all_day=True,
            ),
            Item.normalized(
                datetime_=datetime.datetime(2019, 1, 19),
                text='something else\n- with\n- a\n- list',
                provider='orgmode',
                subprovider=subprovider,
                all_day=True,
            ),
        ]
        self.assertListEqual(result, expected)
コード例 #16
0
def parse_txt(
    f: IO,
    subprovider: str,
    indent_spaces: int = 4,
    sep: str = ': ',
    max_indent: int = 3,
    sep_after_max_indent: str = ' ',
) -> Iterator[Item]:
    current_datetime: Optional[datetime.datetime] = None
    stack: List[str] = []
    lines = peekable(f)
    for line in lines:
        line_clean = line.rstrip()
        if not line_clean:
            continue
        # Title line
        m = regex_heading.match(line_clean)
        if m:
            if stack:
                if not current_datetime:
                    raise ValueError('No date found')
                text = sep.join(stack)
                yield Item.normalized(
                    datetime_=current_datetime,
                    text=text,
                    provider=provider,
                    subprovider=subprovider,
                    all_day=True,
                )
                stack.clear()
            date_str = m.group('date')
            current_datetime = datetime.datetime.strptime(date_str, '%Y-%m-%d')
        # Starts with a non-date line
        elif not current_datetime:
            raise ValueError('No date found')
        # Content line
        else:
            m = regex_content.match(line_clean)
            if not m:
                raise ValueError(f'Misformatted line "{line_clean}"')
            indent_len = len(m.group('indent'))
            if indent_len % indent_spaces != 0:
                raise ValueError(
                    f'Indent not a multiple of {indent_spaces} '
                    f'"{line_clean}"'
                )
            indent_size = indent_len / indent_spaces
            raw_text = m.group('text')
            if indent_size > max_indent:
                indent_size = max_indent
                stack[-1] = sep_after_max_indent.join([stack[-1], raw_text])
                continue
            if indent_size <= len(stack):
                text = sep.join(stack)
                yield Item.normalized(
                    datetime_=current_datetime,
                    text=text,
                    provider=provider,
                    subprovider=subprovider,
                    all_day=True,
                )
                if indent_size < len(stack):
                    stack.pop()
                stack.pop()
            stack.append(raw_text)
        if not lines and stack:
            text = sep.join(stack)
            yield Item.normalized(
                datetime_=current_datetime,
                text=text,
                provider=provider,
                subprovider=subprovider,
                all_day=True,
            )