Python PoiIndex Examples

Programming Language: Python

Namespace/Package Name: poi.index

Class/Type: PoiIndex

Examples at hotexamples.com: 2

Python PoiIndex - 2 examples found. These are the top rated real world Python examples of poi.index.PoiIndex extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

build_index(1)

update_index(1)

Example #1

Show file

File: note_manager.py Project: kalleroska/poi

    def __init__(self, configpath=''):
        # load existing configuration files if exists:
        if not configpath:
            configpath = os.path.join(os.path.expanduser('~'), '.poiconfig.json')
        if not os.path.exists(configpath):
            print('poi: configuration file does not exist. Please run poi-config first.')
            sys.exit(0)

        with open(configpath) as f:
            config = json.load(f)
        for key, value in config.items():
            setattr(self, key, value)

        self.color_style = style_from_dict({
            Token.Title: self.color['title'],
            Token.Link: self.color['link'],
            Token.HL1: self.color['highlight'][0],
            Token.HL2: self.color['highlight'][1],
            Token.HL3: self.color['highlight'][2],
            Token.Tag: self.color['tag'],
            Token.Text: '#ffffff roman',
            })

        self.notes = os.path.join(self.root, 'notes')
        self.refs = os.path.join(self.root, 'refs')
        self.backups = os.path.join(self.root, '.backups')

        self.history = []
        self.link_listing = []
        self.last_notepath = ''

        self.index = PoiIndex(os.path.join(self.root, '.index.txt'), os.path.join(self.notes, '*' + self.file_ext))
        self.tag_index = TagIndex(os.path.join(self.root, '.tag_index.txt'), os.path.join(self.notes, '*' + self.file_ext))

Example #2

Show file

File: note_manager.py Project: kalleroska/poi

class NoteManager(object):
    """
    Backend for poi.

    Takes care of creating a new file for a note, updating the filename when
    editing and displaying the note; searching and listing notes.

    """

    def __init__(self, configpath=''):
        # load existing configuration files if exists:
        if not configpath:
            configpath = os.path.join(os.path.expanduser('~'), '.poiconfig.json')
        if not os.path.exists(configpath):
            print('poi: configuration file does not exist. Please run poi-config first.')
            sys.exit(0)

        with open(configpath) as f:
            config = json.load(f)
        for key, value in config.items():
            setattr(self, key, value)

        self.color_style = style_from_dict({
            Token.Title: self.color['title'],
            Token.Link: self.color['link'],
            Token.HL1: self.color['highlight'][0],
            Token.HL2: self.color['highlight'][1],
            Token.HL3: self.color['highlight'][2],
            Token.Tag: self.color['tag'],
            Token.Text: '#ffffff roman',
            })

        self.notes = os.path.join(self.root, 'notes')
        self.refs = os.path.join(self.root, 'refs')
        self.backups = os.path.join(self.root, '.backups')

        self.history = []
        self.link_listing = []
        self.last_notepath = ''

        self.index = PoiIndex(os.path.join(self.root, '.index.txt'), os.path.join(self.notes, '*' + self.file_ext))
        self.tag_index = TagIndex(os.path.join(self.root, '.tag_index.txt'), os.path.join(self.notes, '*' + self.file_ext))

    def identifier_to_notepath(self, note_id):
        pattern = os.path.join(self.notes, note_id + '*' + self.file_ext)
        notepaths = glob.glob(pattern)  # Will be empty or singleton
        if not notepaths:
            return None
        else:
            return notepaths[0]

    def open_link(self, link):
        # First check whether link is another note:
        m = re.match(r'(\d\d\d\d)-(\d\d)-(\d\d)-(\d+)', link)
        if m:
            # Y, m, d, num = m.groups()
            pattern = os.path.join(self.notes, link + '*' + self.file_ext)
            notepaths = glob.glob(pattern)  # Will be empty or singleton
            if notepaths:
                self.display_note(notepaths[0])
                self.last_notepath = notepaths[0]
                return None
            else:
                print('poi: invalid note identifier')
                return None

        # Otherwise check whether it is a reference:
        refpath = os.path.join(self.refs, link)
        if os.path.exists(refpath):
            return_code = subprocess.call('/usr/bin/open ' + ' ' + '"' + refpath + '"', shell=True)
            return None

        # If nothing else matches, assume link is a url
        return_code = subprocess.call('/usr/bin/open ' + ' ' + '"' + link + '"', shell=True)
        return None

    def generate_notepath(self, date=None):
        """Generate a unique filepath for a note.

        If date is given, use is as a basis; if not use today as the date.

        Parameter
        ---------
            date : str of form 'yyyymmdd', optional
        """

        # If date is not given, use today's date:
        if date is None:
            date = datetime.date.today().strftime('%Y-%m-%d')
        else:
            date = date.strftime('%Y-%m-%d')

        # Modification date, always today:
        m_date = datetime.date.today().strftime('%Y-%m-%d')

        # Loop until a unique filepath is found:
        count = 1
        while True:
            # filename is of form:
            # YYYY-MM-DD-<num>-YYYY-MM-DD<file_ext>
            filename = date + '-' + str(count) + '-' + m_date + self.file_ext
            filepath = os.path.join(self.root, 'notes', filename)
            if os.path.exists(filepath):
                count += 1
            else:
                break
        return filepath

    def list_notes(self):
        """
        Return a list of all filepaths of notes.
        """
        pattern = os.path.join(self.root, 'notes', '*' + self.file_ext)
        return glob.glob(pattern)

    def list_dates(self):
        creation_dates, modification_dates = set(), set()
        for notepath in self.list_notes():
            c_date, m_date, _, _ = extract_filename_info(notepath)
            creation_dates.add(c_date.strftime('%Y-%m-%d %a'))
            modification_dates.add(m_date.strftime('%Y-%m-%d %a'))
        return sorted(creation_dates), sorted(modification_dates)

    def count_tags(self):
        """
        Collect all tags from notes and return as a sorted list.

        TODO: keep a list of tags in a file with a date for its creation.
        Whenever this function is called, only run through those notes whose
        modification date is mote recent that the tags lists date. This can
        save a 1-2 seconds from the user having to wait for the tag list to be
        generated.
        """
        tags = Counter()
        for tag, paths in self.tag_index.index.items():
            tags[tag] = len(paths)
        return tags

    def filter_notes_by_date(self, notes, criteria):
        """
        Filter out notes whose date(s) do not meet criteria.

        Parameters
        ----------
        notes : list
            A list of filepaths of notes.
        criteria : dics
        A dictionary possibly containing several filtering criteria. In this
        function, the follwoing criteria are used, each of which is a date
        object:
            - min_creation_date
            - max_creation_date
            - min_modification_date
            - max_modification_date

        Returns
        -------
        A list of notes whose date(s) meet the criteria.
        """
        result = []

        if 'min_creation_date' not in criteria:
            criteria['min_creation_date'] = datetime.date(1970, 1, 1)
        if 'min_modification_date' not in criteria:
            criteria['min_modification_date'] = datetime.date(1970, 1, 1)
        if 'max_creation_date' not in criteria:
            criteria['max_creation_date'] = datetime.date(9999, 12, 31)
        if 'max_modification_date' not in criteria:
            criteria['max_modification_date'] = datetime.date(9999, 12, 31)

        for notepath in notes:
            creation_date, modification_date, _, _ = extract_filename_info(notepath)
            if creation_date < criteria['min_creation_date']:
                continue
            if creation_date > criteria['max_creation_date']:
                continue
            if modification_date < criteria['min_modification_date']:
                continue
            if modification_date > criteria['max_modification_date']:
                continue
            result.append(notepath)
        return result

    def add_note(self, date=None, tags=[]):
        """
        Create a unique filepath for a new note and open it in an editor.

        Parameters
        ----------
        date : datetime.date, optional
            Creation date of the note.
        tags : list, optional
            A list of tags of the new note.

        Returns
        -------
        int
            Return code given by trying to open with ``subprocess.call``.
        """
        notepath = self.generate_notepath(date)
        if tags:
            with open(notepath, 'wt') as f:
                f.write(10 * '\n' + '#: ' + ', '.join(tags))
        pathlib.Path(notepath).touch()
        return_code = subprocess.call(self.editor_cmd + ' ' + notepath, shell=True)
        # The next line attached the newly created note to _
        self.last_notepath = notepath
        return return_code

    def update_notepath(self, old_notepath, modification_date, creation_date=None):
        """Update a note's filename when it is edited.

        Also history get rewritten with the new filename.
        """
        # old_notepath is form
        # <poiroot>/notes/YYYY-MM-DD-<num>-YYYY-MM-DD<file_ext>

        # dirname is of form
        # <poiroot>/notes
        dirname = os.path.dirname(old_notepath)

        # basename is of form
        # YYYY-MM-DD-<num>-YYYY-MM-DD<file_ext>
        basename = os.path.basename(old_notepath)

        # root = YYYY-MM-DD-<num>-YYYY-MM-DD
        # ext = <file_ext>
        root, ext = os.path.splitext(basename)

        # Extract creation and modification dates as datetime objects and
        # number as an int:
        c_date, m_date, num, _ = extract_filename_info(old_notepath)

        if creation_date:
            c_date = creation_date
        else:
            c_date = c_date.strftime('%Y-%m-%d')

        # Replace previous modification date by the new one, given as an argument:
        m_date = modification_date

        new_notepath = os.path.join(dirname, c_date + '-' + str(num) + '-' + m_date + ext)

        # Create a backup copy of the old file:
        t = str(int(time.time()))
        backup_path = os.path.join(self.backups, root + '-' + t + ext)
        shutil.copy(old_notepath, backup_path)

        # Update the filename with the new modification date:
        shutil.move(old_notepath, new_notepath)

        # Update listing history with the new notepath:
        for listing in self.history:
            for i, notepath in enumerate(listing):
                if notepath == old_notepath:
                    listing[i] = new_notepath

        # Finally, set the last note path to point to the newly edited note:
        self.last_notepath = new_notepath

        return new_notepath

    def edit_note(self, filepath):
        modification_date = datetime.datetime.today().strftime('%Y-%m-%d')
        new_filepath = self.update_notepath(filepath, modification_date)
        return_code = subprocess.call(self.editor_cmd + ' ' + new_filepath, shell=True)
        return return_code

    def edit_date(self, filepath, date):
        modification_date = datetime.datetime.today().strftime('%Y-%m-%d')
        new_notepath = self.generate_notepath(date=date)
        shutil.copy(filepath, new_notepath)
        new_notepath = self.update_notepath(old_notepath=new_notepath, modification_date=modification_date)
        self.index.build_index()


    def tokenize(self, notepath):
        """
        Display the content of a note on the terminal.

        Using a bit of syntax here for highlighting:
        - if a line begins with "$:" or ends with ":$", the whole line is highlighted
        - if a line begins with "@:", the line is interpreted as a link to a
          reference
        - if a line begins with "http://" or "https://", the whole line is
          interpreted as a url
        - if a line begins with "#:", it is interpreted as a tag line
        - if a line contains text between two occurrences of ":::" (or whatever
          is defined a highlight_marker), that part is highlighted. If the line
          has an odd number of ":::", then the last remaining part of the line
          is highlighted.
        """
        with open(notepath, 'rt') as f:
            self.link_listing = {}

            note = parse_note(notepath)

            tokens = []

            tokens.append((Token.Tag, note.identifier))

            if note.title:
                tokens.append((Token.Punct, '\n'))
                tokens.append((Token.Title, note.title))

            if note.body:
                tokens.append((Token.Punct, '\n\n'))

                token_type = Token.Text
                i = 0

                while i < len(note.body):
                    if note.body[i:].startswith(self.highlight_markers[0]):
                        # toggle token_type on or off
                        token_type = Token.HL1 if token_type != Token.HL1 else Token.Text
                        t = len(self.highlight_markers[0])
                        i += t
                    elif note.body[i:].startswith(self.highlight_markers[1]):
                        token_type = Token.HL2 if token_type != Token.HL2 else Token.Text
                        t = len(self.highlight_markers[1])
                        i += t
                    elif note.body[i:].startswith(self.highlight_markers[2]):
                        token_type = Token.HL3 if token_type != Token.HL3 else Token.Text
                        t = len(self.highlight_markers[2])
                        i += t
                    else:
                        tokens.append((token_type, note.body[i]))
                        i += 1

            if note.links:
                tokens.append((Token.Punct, '\n\n'))
                for k, v in note.links:
                    tokens.append((Token.LinkID, '[{}] '.format(k)))
                    tokens.append((Token.Link, '{}'.format(v)))
                    tokens.append((Token.Punct, '\n'))
                    self.link_listing[k] = v
                # Delete the last newline character:
                del tokens[-1]

            if note.tags:
                if note.links:
                    tokens.append((Token.Punct, '\n'))
                else:
                    tokens.append((Token.Punct, '\n\n'))
                tokens.append((Token.Tag, ', '.join(note.tags)))

            tokens.append((Token.Punct, '\n'))
            return tokens
            # print_tokens(tokens, style=self.color_style)

    def display_note(self, notepath):
        os.system('clear')
        tokens = self.tokenize(notepath)
        print_tokens(tokens, style=self.color_style)

    def describe_note(self, notepath):
        note = parse_note(notepath)
        tokens = []
        tokens.append((Token.Attribute, '{:>20}'.format('title: ')))
        tokens.append((Token.Value, note.title + '\n'))
        tokens.append((Token.Attribute, '{:>20}'.format('date: ')))
        tokens.append((Token.Value, note.creation_date + '\n'))
        tokens.append((Token.Attribute, '{:>20}'.format('last modified: ')))
        tokens.append((Token.Value, note.modification_date + '\n'))
        tokens.append((Token.Attribute, '{:>20}'.format('tags: ')))
        tokens.append((Token.Value, ', '.join(note.tags) + '\n'))
        tokens.append((Token.Attribute, '{:>20}'.format('number of links: ')))
        tokens.append((Token.Value, str(len(note.links)) + '\n'))
        tokens.append((Token.Attribute, '{:>20}'.format('filepath: ')))
        tokens.append((Token.Value, note.filepath + '\n'))
        tokens.append((Token.Attribute, '{:>20}'.format('identifier: ')))
        tokens.append((Token.Value, note.identifier + '\n'))
        print_tokens(tokens, style=self.color_style)

    def display_note2(self, notepath):
        """
        Display the content of a note on the terminal.

        Using a bit of syntax here for highlighting:
        - if a line begins with "$:" or ends with ":$", the whole line is highlighted
        - if a line begins with "@:", the line is interpreted as a link to a
          reference
        - if a line begins with "http://" or "https://", the whole line is
          interpreted as a url
        - if a line begins with "#:", it is interpreted as a tag line
        - if a line contains text between two occurrences of ":::" (or whatever
          is defined a highlight_marker), that part is highlighted. If the line
          has an odd number of ":::", then the last remaining part of the line
          is highlighted.
        """
        with open(notepath, 'rt') as f:
            self.link_listing = {}

            note = parse_note(notepath)
            # title has been stripped, so we have to add a newline character
            # below.
            print_tokens([(Token.Title, note.title + '\n')], style=self.color_style)

            # # Make the body ends with a newline, if the body is nonempty:
            # if note.body and note.body[-1] != '\n':
            #     note.body += '\n'
            lines = note.body.split('\n')
            line_num = 0
            while line_num < len(lines):
                line = lines[line_num]

                # Is the line a link to a reference?
                if line.startswith('@:'):
                    ref = line[2:].strip()
                    index = len(self.link_listing) + 1
                    # NOTE: an extra newline is inserted below because it was
                    # stripped above
                    print_tokens([(Token.Link, str(index) + '  ' + ref + '\n')], style=self.color_style)
                    self.link_listing.append(os.path.join(self.refs, ref))
                    line_num += 1
                    continue

                # Is the line a link to a webpage?
                if line.startswith('http://') or line.startswith('https://'):
                    url = line.strip()
                    index = len(self.link_listing) + 1
                    # NOTE: an extra newline is inserted below because it was
                    # stripped above
                    print_tokens([(Token.Link, str(index) + '  ' + url + '\n')], style=self.color_style)
                    self.link_listing.append(url)
                    line_num += 1
                    continue

                # Does the line indicate a highlighted block?
                for i, marker in enumerate(self.highlight_markers):
                    if line == marker:
                        if i == 0:
                            token_type = Token.HL1
                        elif i == 1:
                            token_type = Token.HL2
                        else:  # i == 2
                            token_type = Token.HL3
                        line_num += 1
                        while line_num < len(lines) and lines[line_num] != marker:
                            print_tokens([(token_type, lines[line_num] + '\n')], style=self.color_style)
                            line_num += 1
                        # line_num += 1
                        continue

                # If neither of the above hold, the line is a regular line.
                i = 0
                marker_0_width = len(self.highlight_markers[0])
                marker_1_width = len(self.highlight_markers[1])
                while i < len(line):
                    if line[i:].startswith(self.highlight_markers[0]):
                        j = line[i + marker_0_width:].find(self.highlight_markers[0])
                        if j > -1:
                            print_tokens([(Token.Blue, line[i + marker_0_width: i + marker_0_width + j])], style=self.color_style)
                            i = i + marker_0_width + j + marker_0_width
                        else:
                            print_tokens([(Token.Blue, line[i + marker_0_width:])], style=self.color_style)
                            break
                    elif line[i:].startswith(self.highlight_markers[1]):
                        j = line[i + marker_1_width:].find(self.highlight_markers[1])
                        if j > -1:
                            print_tokens([(Token.Yellow, line[i + marker_1_width: i + marker_1_width + j])], style=self.color_style)
                            i = i + marker_1_width + j + marker_1_width
                        else:
                            print_tokens([(Token.Yellow, line[i + marker_1_width:])], style=self.color_style)
                            break
                    else:
                        print_tokens([(Token, line[i])], style=self.color_style)
                        i += 1
                # Add the newline that was lost when splitting the body on
                # \n.
                line_num += 1
                print()
            if note.tags:
                print_tokens([(Token.Tag, ', '.join(note.tags) + '\n')], style=self.color_style)

    @staticmethod
    def delete_note(notepath):
        """Delete a note"""
        if os.path.exists(notepath):
            os.remove(notepath)

    def filter_notes(self, notepaths=[], criteria={}):
        """
        Given a list of filepaths of notes and a dictionary of criteria, onlu
        keep those notes that meet the criteria.

        NOTE: Filter notes based on days with filter_notes_by_date() because
        that is a lot faster.

        Parameters
        ----------
        notepaths : list
            A list of filepaths representing notes.
        criteria : dict
            A dictionary representing various criteria.

        Returns
        -------
        result : list
            A list of note that meet the criteria.

        """
        result = []
        N = len(notepaths)

        # NOTE: by default, the case of letters is ignored in title, body, and
        # tags. But this can be switched off as option:
        if criteria.get('ignore-case', True):
            if 'words' in criteria:
                criteria['words'] = [word.lower() for word in criteria['words']]
            if 'no-words' in criteria:
                criteria['no-words'] = [word.lower() for word in criteria['no-words']]
            if 'tags' in criteria:
                criteria['tags'] = [tag.lower() for tag in criteria['tags']]

        for i, notepath in enumerate(notepaths):
            self._print_progress_bar(i, N)
            note = parse_note(notepath)

            # most pattern matching is done to the whole content of a note,
            # regardless of whether it is title, body, or tags. So let's define
            # a variable for that:
            content = note.title + '\n' + note.body + '\n' + '#: ' + ' '.join(note.tags)

            tags = note.tags
            if criteria.get('ignore-case', True):
                content = content.lower()
                tags = [t.lower() for t in tags]

            # tags: if tags are given, at least one tag must appear
            if criteria.get('tags', []):
                if not set.intersection(set(criteria['tags']), set(tags)):
                    continue

            # words: all words must appear as a SUBSTRING
            # NOTE: this includes cases like 'string' in 'substring'
            # NOTE: matching is done in lower case
            if criteria.get('words', []):
                for word in criteria['words']:
                    if word not in content:
                        not_all_words_appear = True
                        break
                else:
                    not_all_words_appear = False
                if not_all_words_appear:
                    continue

            # no_words: none of these words must appear
            # NOTE: here only full words are considered. For example, 'substring'
            # does not count as an occurrence of 'string'.
            # NOTE: matching is done preserving the case of letters
            if criteria.get('no-words', []):
                if set.intersection(set(criteria['no-words']), set(content.split())):
                    continue

            result.append(notepath)
        return result

    def _print_progress_bar(self, i, N):
        """Print progressbar.

        Useful when going through a long and slow loop, such as when iterating
        through notes that are parsed and searched.
        """
        full = N // 100
        if i % 100 == 0:
            remain = i // 100
            # http://stackoverflow.com/a/5419488
            print((full - remain) * '.' + remain * ' ' + '\r', end='')
            # print('|\|/'[(i // 100) % 4] + '\r', end='')
            sys.stdout.flush()

    def print_note_listing(self, notepaths):
        """
        Print a list of note in a nicely formatted way.

        Parameter
        ---------
        notepaths : list
            A list of filepaths of notes.
        """

        # fmt is of the form:
        # <index>    YYYY-MM-DD aaa    <title>
        fmt = '{:>4}    {:<14}    {}'
        N = len(notepaths)

        # If note_list is empty, do not print anything
        if N == 0:
            return None
        else:
            print()
        #     print(fmt.format('index', 'date', 'title'))

        for i, notepath in enumerate(notepaths):
            note = parse_note(notepath)
            print(fmt.format(str(N - 1 - i), note.creation_date, note.title))
            # Flush stdout after each print statement so that the listing
            # appears smooth to the user:
            sys.stdout.flush()

        self.history.append(notepaths)
        infobar = '\nlisting: {}\ttotal: {}'.format(len(self.history), N)
        print(infobar)

    def print_tag_listing(self, sort_by_count=False):
        tags = self.count_tags()
        if not tags:
            return None
        max_len = max(len(t) for t in tags)
        # print()
        if sort_by_count:
            items = sorted(tags.items(), key=lambda x: x[1])
        else:
            items = sorted(tags.items(), key=lambda x: x[0].lower())

        for tag, count in items:
            tokens = []
            # tokens.append((Token.Tag, ('  {:<}').format(tag)))
            tokens.append((Token.Tag, ('{:<' + str(max_len + 2) + '}').format(tag)))
            tokens.append((Token, '{:<3} '.format(count)))
            print_tokens(tokens, style=self.color_style)
            print()
        print('\ntotal:', len(tags))


    def search_by_keywords(self, query):
        self.index.update_index()
        res = defaultdict(set)
        for word in query:
            word = word.lower()
            for token in self.index.index.keys():
                if word in token.lower():
                    res[word].update(set(self.index[token]))
        if not res.values():
            return []
        else:
            note_ids = reduce(set.intersection, res.values())
            notepaths = [self.identifier_to_notepath(note_id) for note_id in note_ids]
            notepaths = list(sorted(notepaths))
            return notepaths

    def filter_notes_by_tags(self, notepaths, criteria):
        """
        Filter out notes that do not have a tag in criteria.
        """

        # If criteria does not have nay tags in, let every note pass through.
        if 'tags' not in criteria:
            return notepaths

        self.tag_index.update_index()

        # Take every note whose tags include all tags in the criteria.
        res = defaultdict(set)
        for t in criteria['tags']:
            # print(self.tag_index.index)
            if t in self.tag_index.index:
                res[t].update(set(self.tag_index[t]))
        if not res.values():
            return []
        else:
            note_ids = reduce(set.intersection, res.values())
            filtered_notepaths = []
            for np in notepaths:
                _, _, _, identifier = extract_filename_info(np)
                if identifier in note_ids:
                    filtered_notepaths.append(np)
            return filtered_notepaths