Ejemplo n.º 1
0
 def callback(true_name, form):
     if form == 'ignore':
         self.update_name_index(NameItem(false_name, None, form))
         return
     item = NameItem(false_name, true_name, form)
     self.process(item, file_paths, target_dir=target_dir)
     self.update_name_index(item)
Ejemplo n.º 2
0
    def process_new(self, prompt=True):
        """Processes all new measurements

        Updates name index with the new entries now found in the name index previously.

        Returns:
            None
        """
        for false_name, rigs_and_file_paths in self.urls.items():
            for rig, file_paths in rigs_and_file_paths.items():
                try:
                    ni = self.name_index.find(false_name=false_name)
                    item = ni.items[0] if ni else None

                    if item and item.form == 'ignore':
                        continue

                    # TODO: Infer form from the file path
                    file_paths = [os.path.abspath(p) for p in file_paths]
                    if rig == 'gras':
                        form = 'onear'
                        target_dir = os.path.join(DIR_PATH, 'data', 'onear', 'GRAS 43AG-7')
                    elif rig == 'legacy':
                        form = 'onear'
                        target_dir = os.path.join(DIR_PATH, 'data', 'onear', 'Ears-711')
                    else:
                        form = None
                        target_dir = os.path.join(DIR_PATH, 'data', 'inear')

                    if item and item.true_name:
                        # Name index contains the entry
                        if not self.existing.find(true_name=item.true_name):
                            # Doesn't exist yet
                            if form is not None:
                                item.form = form
                            self.process(item, file_paths, target_dir=target_dir)

                    else:
                        # Unknown item
                        if prompt:
                            # Prompt true name and form
                            print(f'\n"{false_name}" is not known.')
                            item = self.prompt(false_name, form=form)
                            if item is None:
                                self.name_index.update(NameItem(false_name, None, 'ignore'), false_name=false_name)
                                continue
                            self.name_index.update(item, false_name=false_name)
                            self.process(item, file_paths, target_dir=target_dir)
                        else:
                            print(f'"{false_name}" is not known. Add true name and form to name index and run again.')
                            self.name_index.update(NameItem(false_name, None, None), false_name=false_name)
                        self.write_name_index()
                except Exception as err:
                    print(f'Processing failed for "{false_name}"')
                    raise err
Ejemplo n.º 3
0
 def callback(true_name, form):
     if form == 'ignore':
         self.update_name_index(NameItem(false_name, None, form))
         return
     item = NameItem(false_name, true_name, form)
     try:
         self.process(NameItem(false_name, true_name, form), url)
     except FileNotFoundError as err:
         print(err)
         return
     self.update_name_index(item)
Ejemplo n.º 4
0
    def process_new(self, prompt=True):
        """Processes all new measurements

        Updates name index with the new entries now found in the name index previously.

        Returns:
            None
        """
        for false_name, link in self.links.items():
            try:
                item = self.name_index.find_by_false_name(false_name)
                if item and item.form == 'ignore':
                    continue
                if item and item.true_name:
                    # Name index contains the entry
                    if self.existing.find_by_true_name(item.true_name):
                        # Exists already, skip
                        continue
                    self.process(item, link)
                else:
                    if prompt:
                        print(f'\n"{false_name}" is not known.')
                        if self.names is not None:
                            name_options = self.names.search_by_false_name(
                                false_name)
                            name_options = [
                                match[0].true_name +
                                (' ✓' if match[1] == 100 else '')
                                for match in name_options
                            ]
                        else:
                            name_options = [false_name]
                        true_name = prompt_name(name_options)
                        if true_name is None:
                            self.name_index.update_by_false_name(
                                NameItem(false_name, None, 'ignore'))
                            continue
                        true_name = true_name.replace(' ✓', '')
                        form = prompt_form()
                        item = NameItem(false_name, true_name, form)
                        self.name_index.update_by_false_name(item)
                        self.process(item, link)
                    else:
                        print(
                            f'"{false_name}" is not known. Add true name and form to name index and run this again.'
                        )
                        self.name_index.update_by_false_name(
                            NameItem(false_name, None, None))

            except Exception as err:
                raise err
                print(f'Failed to process {false_name}: {str(err)}')
Ejemplo n.º 5
0
    def get_names(self):
        """Downloads parses phone books to get names

        Returns:
            NameIndex
        """
        names = NameIndex()
        res = requests.get('https://crinacle.com/graphing/data_hp/phone_book.json')  # Headphone book
        hp_book = self.parse_book(res.json())
        for false_name, true_name in hp_book.items():
            names.add(NameItem(false_name, true_name, 'onear'))
        res = requests.get('https://crinacle.com/graphing/data/phone_book.json')  # IEM book
        iem_book = self.parse_book(res.json())
        for false_name, true_name in iem_book.items():
            names.add(NameItem(false_name, true_name, 'inear'))
        return names
Ejemplo n.º 6
0
    def process_new(self, prompt=True):
        """Processes all new measurements

        Updates name index with the new entries now found in the name index previously.

        Returns:
            None
        """
        for false_name, url in self.urls.items():
            try:
                ni = self.name_index.find(false_name=false_name)
                item = ni.items[0] if ni else None

                if item and item.form == 'ignore':
                    continue

                if item and item.true_name:
                    # Name index contains the entry
                    if not self.existing.find(true_name=item.true_name):
                        # Doesn't exist already
                        print(f'Didn\'t find {item.true_name} in existing')
                        self.process(item, url)

                else:
                    # Unknown item
                    if prompt:
                        # Prompt true name and form
                        print(f'\n"{false_name}" is not known.')
                        item = self.prompt(false_name)
                        if item is None:
                            self.name_index.update(NameItem(
                                false_name, None, 'ignore'),
                                                   false_name=false_name)
                            continue
                        self.name_index.update(item, false_name=false_name)
                        self.process(item, url)
                    else:
                        print(
                            f'"{false_name}" is not known. Add true name and form to name index and run again.'
                        )
                        self.name_index.update(NameItem(
                            false_name, None, None),
                                               false_name=false_name)
                    self.write_name_index()
            except Exception as err:
                print(f'Processing failed for "{false_name}"')
                raise err
Ejemplo n.º 7
0
 def fn(true_name, form):
     self.name_index.add(NameItem(false_name, true_name, form))
     self.write_name_index()
     image_path, rig = self.download_image(report_url, image_dir,
                                           false_name, true_name, form)
     if image_path:
         callback(image_path, rig, true_name, form, data_dir,
                  inspection_dir)
    def download_images(self, url, item, image_dir):
        document = self.get_beautiful_soup(url)  # Reports page

        report_urls = dict()
        labels = document.find_all(name='span', text=self.pro_report_regex)
        for label in labels:
            parent = label.parent.parent.parent
            anchor = parent.find_all('a')[1]
            suffix = anchor.text.lower().strip()
            name = item.true_name
            if suffix != item.false_name.lower() and suffix != 'default':
                name += f' ({suffix})'

            # The suffixes above are read automatically from the reports compilation page.
            # However these might not be the names that should exist in AutoEq.
            mods = self.name_index.find(false_name=name)
            if mods:
                # Find an item in name index which has the given name with automatic
                # suffixes as false name and replace the name with it's true name.
                true_name = mods.items[0].true_name
            else:
                # Not in the name index, prompt user
                print(f'Mod of "{name}" is not known.')
                false_name = name
                true_name = self.prompt_true_name([false_name])
                self.name_index.add(NameItem(false_name, true_name, item.form))
                self.write_name_index()

            report_urls[
                true_name] = f'https://reference-audio-analyzer.pro{anchor["href"]}'

        results = []
        for name, url in report_urls.items():
            document = self.get_beautiful_soup(url)  # Sets the driver also
            el = document.find(name='li', text=self.performed_on_stand_regex)
            try:
                rig = el.parent.find(name='ul').find(name='a').text
            except AttributeError as err:
                rig = 'HDM-X' if item.form == 'onear' else 'SIEC'
                print(
                    f'Measurement rig could not be read for "{item.false_name}", guessing {rig}'
                )
            try:
                graph = self.driver.find_element_by_id(
                    'response9').find_element_by_tag_name('div')  # FR Graph
            except Exception:
                print(f'No graph for {item.false_name}')
                continue
            # Background image
            url = graph.value_of_css_property('background-image').replace(
                'url("', '').replace('")', '')
            file_path = self.download(url, name, image_dir)
            results.append({
                'name': name,
                'image_path': file_path,
                'rig': rig,
            })
        return results
Ejemplo n.º 9
0
def rename_manufacturers():
    manufacturers = ManufacturerIndex()

    for db in DBS:
        if os.path.isfile(os.path.join(DIR_PATH, db, 'name_index.tsv')):
            # Rename entries in name index if such exists
            name_index = NameIndex.read_tsv(
                os.path.join(DIR_PATH, db, 'name_index.tsv'))

            for item in name_index.items:
                if item.form == 'ignore' or not item.true_name:
                    continue
                true_name = manufacturers.replace(item.true_name)
                if true_name is None:
                    print(f'"{name}" not found in manufacturers')
                    continue
                if true_name == item.true_name:
                    continue

                print(f'Renamed "{item.true_name}" with "{true_name}"')
                name_index.update(
                    NameItem(item.false_name, true_name, item.form),
                    item.false_name, item.true_name, item.form)

                name_index.write_tsv(
                    os.path.join(DIR_PATH, db, 'name_index.tsv'))

        # Rename existing files
        existing_files = list(
            glob(os.path.join(DIR_PATH, db, 'data', '**', '*.csv'),
                 recursive=True))
        for fp in existing_files:
            dir_path, name = os.path.split(fp)
            name = name.replace('.csv', '')
            true_name = manufacturers.replace(name)
            if true_name is None:
                print(f'"{name}" not found in manufacturers')
                continue
            new_dir_path = os.path.abspath(
                os.path.join(dir_path, os.pardir, true_name))
            new_file_path = os.path.join(new_dir_path, f'{true_name}.csv')
            os.makedirs(new_dir_path, exist_ok=True)
            if os.path.normcase(
                    os.path.normpath(new_file_path)) != os.path.normcase(
                        os.path.normpath(fp)):
                print(
                    f'Moved "{os.path.relpath(fp, DIR_PATH)}" to "{os.path.relpath(new_file_path, DIR_PATH)}"'
                )
                shutil.move(fp, new_file_path)
                try:
                    os.rmdir(dir_path)
                except OSError:
                    pass
Ejemplo n.º 10
0
def main():
    manufacturers = ManufacturerIndex()

    for db in ['crinacle', 'headphonecom', 'innerfidelity', 'oratory1990', 'rtings']:
        if os.path.isfile(os.path.join(DIR_PATH, db, 'name_index.tsv')):
            name_index = NameIndex.read_tsv(os.path.join(DIR_PATH, db, 'name_index.tsv'))
        else:
            name_index = NameIndex()

        for item in name_index.items:
            if item.form == 'ignore' or not item.true_name:
                continue
            true_name = manufacturers.replace(item.true_name)
            if true_name is None:
                print(f'"{name}" not found in manufacturers')
                continue
            if true_name == item.true_name:
                continue

            print(f'Renamed "{item.true_name}" with "{true_name}"')
            name_index.update(
                NameItem(item.false_name, true_name, item.form),
                item.false_name, item.true_name, item.form
            )

        if name_index:
            name_index.write_tsv(os.path.join(DIR_PATH, db, 'name_index.tsv'))

        existing = list(glob(os.path.join(DIR_PATH, db, 'data', '**', '*.csv'), recursive=True))
        for fp in existing:
            dir_path, name = os.path.split(fp)
            name = name.replace('.csv', '')
            true_name = manufacturers.replace(name)
            if true_name is None:
                print(f'"{name}" not found in manufacturers')
                continue
            new_dir_path = os.path.abspath(os.path.join(dir_path, os.pardir, true_name))
            new_file_path = os.path.join(new_dir_path, f'{true_name}.csv')
            os.makedirs(new_dir_path, exist_ok=True)
            if os.path.normcase(os.path.normpath(new_file_path)) != os.path.normcase(os.path.normpath(fp)):
                print(f'Moved "{os.path.relpath(fp, DIR_PATH)}" to "{os.path.relpath(new_file_path, DIR_PATH)}"')
                shutil.move(fp, new_file_path)
                try:
                    os.rmdir(dir_path)
                except OSError:
                    pass
Ejemplo n.º 11
0
    def get_name_proposals(self,
                           false_name,
                           n=4,
                           normalize_digits=False,
                           normalize_extras=False,
                           threshold=60):
        """Prompts manufacturer, model and form from the user

        Args:
            false_name: Name as it exists in the measurement source
            n: Number of proposals to return
            normalize_digits: Normalize all digits to zeros before calculating fuzzy string matching score
            normalize_extras: Remove extra details in the parentheses
            threshold: Score threshold

        Returns:
            NameItem
        """
        def fuzzy(fn, a, b):
            a = a.lower()
            b = b.lower()
            if normalize_digits:
                a = re.sub(r'\d', '0', a).strip()
                b = re.sub(r'\d', '0', b).strip()
            if normalize_extras:
                a = re.sub(r'\(.+\)$', '', a).strip()
                b = re.sub(r'\(.+\)$', '', b).strip()
            return fn(a, b)

        manufacturer, manufacturer_match = self.manufacturers.find(false_name)
        if not manufacturer:
            return NameIndex([])
        false_model = re.sub(re.escape(manufacturer_match),
                             '',
                             false_name,
                             flags=re.IGNORECASE).strip()
        # Select only the items with the same manufacturer
        models = self.name_proposals[self.name_proposals.manufacturer ==
                                     manufacturer]

        # Calculate ratios
        partial_ratios = [
            fuzzy(fuzz.partial_ratio, model, false_model)
            for model in models.model.tolist()
        ]
        ratios = [
            fuzzy(fuzz.ratio, model, false_model)
            for model in models.model.tolist()
        ]

        models = models.assign(partial_ratio=partial_ratios)
        models = models.assign(ratio=ratios)
        models = models[models.partial_ratio >= threshold]
        models.sort_values('ratio', ascending=False, inplace=True)
        proposals = []
        for i, row in models.iterrows():
            proposals.append(
                NameItem(None, f'{manufacturer} {row.model}', row.form))
        ni = NameIndex(items=proposals)
        ni.df = ni.df.head(n)
        return ni
Ejemplo n.º 12
0
def rename_groups(databases=DBS):
    with open(os.path.join(DIR_PATH, 'name_groups.tsv'), 'r',
              encoding='utf-8') as fh:
        lines = fh.read().strip().split('\n')

    # First column is always the true name
    # Create dict with each false name as key and it's true name as value
    name_map = dict()
    for line in lines:
        names = line.split('\t')
        if len(names) > 1:
            for i in range(1, len(names)):
                name_map[names[i]] = names[0]

    # Read name indexes and existing files for all supported measurement databases
    dbs = []
    for db in databases:
        if os.path.isfile(os.path.join(DIR_PATH, db, 'name_index.tsv')):
            # Read name index
            name_index = NameIndex.read_tsv(
                os.path.join(DIR_PATH, db, 'name_index.tsv'))
        else:
            # No name index, create one anew
            name_index = NameIndex()
        # Read all the existing files for the database
        files = list(
            glob(os.path.join(DIR_PATH, db, 'data', '**', '*.csv'),
                 recursive=True))
        files = [{
            'name': os.path.split(file)[1].replace('.csv', ''),
            'path': file
        } for file in files]
        # Save both to dbs
        dbs.append({'name': db, 'name_index': name_index, 'files': files})

    for old_name, new_name in name_map.items():
        print(f'"{old_name}" -> "{new_name}"')
        for db in dbs:
            name_index = db['name_index']
            # Replace true names in name index with the new name
            updated_item = False
            matches = name_index.find(true_name=old_name)
            for item in matches.items:
                if new_name == 'ignore':
                    name_index.update(NameItem(false_name=item.false_name,
                                               true_name=item.true_name,
                                               form='ignore'),
                                      true_name=old_name)
                    print(
                        f'    Updated item: "{item.false_name}", "{new_name}", "ignore"'
                    )
                else:
                    name_index.update(NameItem(false_name=item.false_name,
                                               true_name=new_name,
                                               form=item.form),
                                      true_name=old_name)
                    print(
                        f'    Updated item: "{item.false_name}", "{new_name}", "{item.form}"'
                    )
                updated_item = True

            # Rename existing files
            for name, path in [(f['name'], f['path']) for f in db['files']
                               if f['name'].lower() == old_name.lower()]:
                if new_name == 'ignore':
                    print(f'    Removing "{os.path.split(path)[0]}"')
                    shutil.rmtree(os.path.split(path)[0])
                    if not updated_item:
                        name_index.add(
                            NameItem(false_name=old_name,
                                     true_name=None,
                                     form='ignore'))
                        print(f'    Added item: "{old_name}", "", "ignore"')
                    continue

                new_path = re.sub(re.escape(name), new_name, path)
                print(
                    f'    Moving "{os.path.relpath(path, DIR_PATH)}" to "{os.path.relpath(new_path, DIR_PATH)}"'
                )
                os.makedirs(os.path.split(new_path)[0], exist_ok=True)
                shutil.move(path, new_path)
                os.rmdir(os.path.join(path, os.pardir))
                matches = name_index.find(true_name=new_name)
                if not matches:
                    d = path
                    while True:
                        d, f = os.path.split(d)
                        if f in ['onear', 'inear', 'earbud']:
                            form = f
                            break
                    name_index.add(
                        NameItem(false_name=old_name,
                                 true_name=new_name,
                                 form=form))
                    print(
                        f'    Added item: "{old_name}", "{new_name}", "{form}"'
                    )
        print()

    for db in dbs:
        db['name_index'].write_tsv(
            os.path.join(DIR_PATH, db['name'], 'name_index.tsv'))
Ejemplo n.º 13
0
    def prompt(self, false_name):
        """Prompts user for true name and form based on false name."""
        form = None
        if self.name_proposals is not None:
            # Name proposals initialized, add matching entries to options in prompt
            matches = []
            matches += self.name_proposals.search_by_false_name(false_name)
            matches += self.name_proposals.search_by_true_name(false_name)
            names_and_ratios = []
            for match in matches:
                if not match[0].true_name:
                    # Skip items without true name
                    continue
                if match[1] == 100:
                    # Exact match
                    match[0].true_name += ' ✓'
                if match[0].true_name not in [x[0] for x in names_and_ratios]:
                    # New match
                    names_and_ratios.append(
                        (match[0].true_name, match[1], match[0].form))
                else:
                    # Existing match, update ratio
                    for i in range(len(names_and_ratios)):
                        if match[0].true_name == names_and_ratios[i][
                                0] and match[1] > names_and_ratios[i][1]:
                            names_and_ratios[i] = (names_and_ratios[i][0],
                                                   match[1],
                                                   names_and_ratios[i][2])

            name_options = [
                x[0] for x in sorted(
                    names_and_ratios, key=lambda x: x[1], reverse=True)[:4]
            ]
            if false_name not in name_options:
                name_options.append(false_name)  # Add the false name

            # Prompt
            true_name = self.prompt_true_name(name_options)

            if true_name is None:
                return None

            # Find and replace true manufacturer name or prompt it
            if self.manufacturers.find(true_name)[0] is None:
                # Unknown manufacturer, find options with the two first words and prompt it
                manufacturer_options = []
                for i in range(1, min(3, len(true_name.split()))):
                    candidate = ' '.join(true_name.split()[:i])
                    print(candidate)
                    manufacturer_options += self.manufacturers.search(
                        candidate)
                    if candidate not in [x[0] for x in manufacturer_options]:
                        manufacturer_options.append((candidate, 0))
                manufacturer_options = sorted(manufacturer_options,
                                              key=lambda x: x[1],
                                              reverse=True)
                manufacturer_options = [x[0] for x in manufacturer_options]
                manufacturer, replace = self.prompt_manufacturer(
                    manufacturer_options)
                _, match = self.manufacturers.find(manufacturer)
                if match:
                    # Add as a new variant in existing manufacturer
                    for m in self.manufacturers.manufacturers:
                        if m[0] == match:
                            m.append(replace)
                else:
                    # Add new manufacturer
                    self.manufacturers.manufacturers.append([manufacturer])
                self.manufacturers.write()
            # Replace
            true_name = self.manufacturers.replace(true_name)

            # Find the answer and select form
            for name, ratio, f in names_and_ratios:
                if true_name == name:
                    form = f
                    break
            true_name = true_name.replace(' ✓', '')

        else:
            true_name = self.prompt_true_name([false_name])
            form = None

        if true_name is None:
            # User skipped
            return None

        if form is None:
            # Form not found in name proposals, prompt it
            form = self.prompt_form()

        return NameItem(false_name, true_name, form)