Ejemplo n.º 1
0
 def show_help( self):
     help_text = '''- Use the check boxes on the [b]left[/b] and [b]right[/b] to select columns from each spreadsheet. [b]Checked columns will appear in the output[/b].\n\n- These columns will not be used for matching or ranking.\n\n- You cannot uncheck any column that was used for matching in a previous step.\n'''
     self._popup = HelpMsg( help_text, title='Help', size_hint=(0.75,0.6))
     self._popup.open()
Ejemplo n.º 2
0
 def show_help( self):
     help_text = '- In SQL terminology, this software implements a "left join" on one or more inexact keys.\n\n- Use the [b]"Browse"[/b] buttons to select the two spreadsheets you want to match. When prompted, give each sheet a name.\n\n- The [b]"Populate to"[/b] sheet is the one you want to populate with matches. In SQL terminology it is the "left" table. The match output will contain one (or optionally more) match result for each row in this sheet. [b]This is typically the sheet with the fewest rows.[/b]\n\n- The [b]"Populate from"[/b] sheet should contain the potential matches for the rows in the first sheet. In SQL terminology this is the "right" table. [u]Note:[/u] not every row in this sheet will necessarily populate into the first sheet, and rows from this sheet can populate more than once into the first one.\n'
     self._popup = HelpMsg( help_text, title='Help', size_hint=(0.92,0.92))
     self._popup.open()
Ejemplo n.º 3
0
 def show_about( self):
     about_text = 'This GUI and related code is written by Steve Suway. The GUI is built using Kivy. Matching computations are performed using string_grouper, which is written by Chris van den Berg. string_grouper\'s matching functionality builds upon sparse_dot_topn, which is an open-source project by ING Bank. ASCII transliteration is done using Unidecode, written by Tomaz Solc. Other packages this code relies on include NumPy, SciPy, and pandas.'
     self._popup = HelpMsg( about_text, title='About', size_hint=(0.75,0.60))
     self._popup.open()
Ejemplo n.º 4
0
class NarrowByPanel(BoxLayout):
    select_box = ObjectProperty(None)
    back_btn = ObjectProperty(None)
    next_btn = ObjectProperty(None)

    def __init__(self, **kwargs):
        super(NarrowByPanel, self).__init__(**kwargs)

        self.select_section1 = ColumnSelectSection(text='Column1:')
        self.select_section2 = ColumnSelectSection(text='Column2:')

        self.select_section1.new_col_btn.bind(
            on_release=lambda *args: self.show_col_merger(1))
        self.select_section2.new_col_btn.bind(
            on_release=lambda *args: self.show_col_merger(2))

        self.select_box.add_widget(self.select_section1)
        self.select_box.add_widget(self.select_section2)

        self.back_btn.bind(on_release=self.back_callback)
        self.next_btn.bind(on_release=self.next_callback)

    def show_col_merger(self, which):
        colm = ColumnMerger()
        app = App.get_running_app()
        if which == 1:
            cols = app.backend.columns1
        else:
            cols = app.backend.columns2
        colm.populate(cols)
        colm.which = which
        popup = Popup(title='Create merged column')
        colm.cancel_btn.bind(on_release=popup.dismiss)
        colm.merge_btn.bind(on_release=self.do_col_merge)
        popup.content = colm
        popup.open()
        self.merge_popup = popup

    def do_col_merge(self, *args):
        app = App.get_running_app()
        backend = app.backend
        colm = self.merge_popup.content
        if len(colm.right_buttons) < 2:
            _popup = ErrorMsg(
                error_text='Choose at least 2 columns to merge together.')
            _popup.open()
            return
        col = colm.ids.new_col_text.text
        if col == '':
            _popup = ErrorMsg(
                error_text=
                'Enter a name for the merged column that will be created.')
            _popup.open()
            return
        try:
            if colm.which == 1:
                df = backend.grouper_helper.df1
            else:
                df = backend.grouper_helper.df2
            col_out = backend.merge_cols(df,
                                         [b.text for b in colm.right_buttons])
            new_col_name = col
            ndupe = 1
            while any(df.columns == new_col_name):
                new_col_name = col + '.' + str(ndupe)
                ndupe += 1
                if ndupe > 5000:
                    raise Exception(
                        'Maybe try a different name for the merged column?')
            df.insert(0, new_col_name, col_out)
            if colm.which == 1:
                backend.columns1 = df.columns
                self.populate_dropdown1()
            else:
                backend.columns2 = df.columns
                self.populate_dropdown2()
            app.panels['alsocompare_screen'].reset_panel()
            app.panels['append_screen'].populate()
            self.merge_popup.dismiss()
        except Exception as error:
            self.merge_popup.dismiss()
            error_type = str(type(error)).split('\'')[1]
            error_msg = error
            _popup = ErrorMsg(
                error_text='Error creating new column: {}. {}'.format(
                    error_type, error_msg))
            _popup.open()

    def show_help(self):
        help_text = '''- Use the drop-down menus to select one column from each spreadsheet to use for matching.\n\n- These columns will be used to [b]identify the potential matches[/b] between the two sheets.\n\n- If needed, you can first create a [b]merged column[/b] from two or more existing columns. This is useful (for example) for combining FirstName+LastName, or Street+City+State+Zip.\n'''
        self._popup = HelpMsg(help_text, title='Help', size_hint=(0.85, 0.65))
        self._popup.open()

    def populate_dropdowns(self):
        self.populate_dropdown1()
        self.populate_dropdown2()

    def populate_dropdown1(self):
        app = App.get_running_app()
        self.select_section1.populate(app.backend.columns1)
        nrows, ncols = app.backend.grouper_helper.df1.shape
        self.select_section1.lbl.text = app.backend.labels[0]
        self.select_section1.shape_lbl.text = f'({nrows} rows, {ncols} cols)'

    def populate_dropdown2(self):
        app = App.get_running_app()
        self.select_section2.populate(app.backend.columns2)
        nrows, ncols = app.backend.grouper_helper.df2.shape
        self.select_section2.lbl.text = app.backend.labels[1]
        self.select_section2.shape_lbl.text = f'({nrows} rows, {ncols} cols)'

    def back_callback(self, btn):
        self.select_section1.depopulate()
        self.select_section2.depopulate()
        app = App.get_running_app()
        app.backend.narrow_by = [None, None]
        app.nav_to('load_screen', 'right')

    def next_callback(self, btn):
        app = App.get_running_app()
        app.backend.narrow_by = [
            self.select_section1.dropdown.text,
            self.select_section2.dropdown.text
        ]  #overwrite, no issues with going back_btn
        app.nav_to('alsocompare_screen', 'left')
Ejemplo n.º 5
0
 def show_help(self):
     help_text = '''- Use the drop-down menus to select one column from each spreadsheet to use for matching.\n\n- These columns will be used to [b]identify the potential matches[/b] between the two sheets.\n\n- If needed, you can first create a [b]merged column[/b] from two or more existing columns. This is useful (for example) for combining FirstName+LastName, or Street+City+State+Zip.\n'''
     self._popup = HelpMsg(help_text, title='Help', size_hint=(0.85, 0.65))
     self._popup.open()
Ejemplo n.º 6
0
 def show_help(self):
     help_text = '''- [u]Keep the top [i]n[/i] matches[/u]: choose how many alternate matches you want to review. If you only want the top-scoring match for each row, select \'1\'.\n\n- [u]Add spacer between groups of matches[/u]: if you export multiple alternate matches, this option adds a blank spacer row between separate groups of matches, which helps guide the eye during manual review.\n\n- [u]Sort rows by similarity[/u]: if enabled, rows will be sorted by match score. If disabled, the original row order from your input spreadsheet will be preserved.\n\n- [u]Copy matches to clipboard[/u]: choose this option if you want to paste the matches into Excel or Google Sheets.\n\n- [u]Save matches to file[/u]: choose this option if you want to save the matches to a spreadsheet file (.xlsx, .csv, or .txt).\n'''
     self._popup = HelpMsg(help_text, title='Help', size_hint=(0.9, 0.9))
     self._popup.open()
Ejemplo n.º 7
0
 def show_help(self):
     help_text = '''- Optionally, select additional columns for matching. To add columns, press the [b]"Add..."[/b] button and use the drop-down menus to select a column from each sheet. You can add as many additional comparisons as you like.\n\n- If you use this feature, an [b]average similarity[/b] over all match columns will be used to rank matches. This average will be appended to the output.\n'''
     self._popup = HelpMsg(help_text, title='Help', size_hint=(0.75, 0.6))
     self._popup.open()
Ejemplo n.º 8
0
 def help_shortstr(self):
     help_text = 'If disabled, words/strings shorter than the n-gram size (default=3) will not be matched. If enabled, short strings will be padded to the length of the n-gram size. This allows match scores to be computed for short strings. \n\n[b]Note1[/b]: Match scores may be less accurate for padded words/strings, though these scores are often still useful.\n\n[b]Note2[/b]: The padded entry will be used to compute match scores, but the original entry will always be preserved in the final output sheet.\n'
     self._popup = HelpMsg(help_text,
                           title='Short string support',
                           size_hint=(0.75, 0.65))
     self._popup.open()
Ejemplo n.º 9
0
 def help_whitesp(self):
     help_text = 'If enabled, whitespace will be stripped from each entry before matching. This includes spaces, tabs, line-breaks, etc.\n\n[b]Note[/b]: The modified entry will be used to compute match scores, but the original entry will always be preserved in the final output sheet.\n'
     self._popup = HelpMsg(help_text,
                           title='Ignore whitespace',
                           size_hint=(0.65, 0.5))
     self._popup.open()
Ejemplo n.º 10
0
 def help_amperland(self):
     help_text = 'Replace ampersands (&) with "and". Example: if enabled, "Health & Safety" will be considered identical to "Health and Safety".\n\n[b]Note[/b]: The modified entry will be used to compute match scores, but the original entry will always be preserved in the final output sheet.\n'
     self._popup = HelpMsg(help_text,
                           title='Convert ampersands',
                           size_hint=(0.65, 0.55))
     self._popup.open()
Ejemplo n.º 11
0
 def help_unidecode(self):
     help_text = "Enable to strip accents/diacritics and to transliterate non-Latin characters. Examples: if enabled, ö is equal to o; å is equal to a, ş is equal to s; δ is equal to d; щ is equal to shch; the Hangul character hieut is equal to h.\n\n[b]Note[/b]: The modified entry will be used to compute match scores, but the original entry will always be preserved in the final output sheet.\n"
     self._popup = HelpMsg(help_text,
                           title='ASCII transliteration',
                           size_hint=(0.65, 0.55))
     self._popup.open()
Ejemplo n.º 12
0
 def help_case(self):
     help_text = 'Ignore whether strings are written in upper- or lower-case. If enabled, "hello" will be considered identical to "HELLO".\n\n[b]Note[/b]: The case-corrected entry will be used to compute match scores, but the original entry will always be preserved in the final output sheet.\n'
     self._popup = HelpMsg(help_text,
                           title='Ignore case',
                           size_hint=(0.65, 0.55))
     self._popup.open()
Ejemplo n.º 13
0
 def help_excl(self):
     help_text = 'Provide a list of characters that should be ignored when computing match scores. These characters will be removed from each entry before matching.\n\n[b]Note1[/b]: Do not include any delimiters between characters in this list. Just type each character.\n\n[b]Note2[/b]: You cannot include multiple-character sequences. For example, if you are trying to exclude the sequence "com", this will remove all c\'s, o\'s, and m\'s individually.\n\n[b]Note3[/b]: The modified entry will be used to compute match scores, but the original entry will always be preserved in the final output sheet. \n'
     self._popup = HelpMsg(help_text,
                           title='Ignore characters',
                           size_hint=(0.8, 0.8))
     self._popup.open()
Ejemplo n.º 14
0
 def help_ngram(self):
     help_text = 'The match algorithm converts character strings into a set of "features" based on small groups of sequential characters called "n-grams". The n-gram size sets the number of sequential characters to use for feature extraction. An n-gram size of 3 generally works well, but if all your words/strings are very short a size of 2 could be more accurate.\n'
     self._popup = HelpMsg(help_text,
                           title='N-gram size',
                           size_hint=(0.7, 0.6))
     self._popup.open()
Ejemplo n.º 15
0
 def help_nmatch(self):
     help_text = 'Set the maximum number of matches to consider for each entry in the [b]Populate to[/b] sheet. Decreasing this can lead to more false negatives (dropped matches that are actually good). Increasing this can lead to more false positives (keeping matches that are not good). [b]For large datasets you may get better results if you increase this number.[/b]\n'
     self._popup = HelpMsg(help_text,
                           title='Maximum number of matches',
                           size_hint=(0.7, 0.6))
     self._popup.open()
Ejemplo n.º 16
0
 def help_sim(self):
     help_text = 'Set the minimum match score allowed for potential matches. The valid range is between 0 and 1. Scores below this number will not populate into the output. It is generally recommend to keep this set to 0 and manually review matches with low scores.\n\n[b]Note[/b]: If an entry in the [b]Populate to[/b] sheet has no match in the [b]Populate from[/b] sheet with a score greater than this number, the entry will [i]not[/i] be dropped from the output. Rather, the entry will not be paired with any entry from the [b]Populate from[/b] sheet, and the match score for the entry will be left blank.\n'
     self._popup = HelpMsg(help_text,
                           title='Minimum match score',
                           size_hint=(0.75, 0.7))
     self._popup.open()