def callback(true_name, form): if form == 'ignore': self.update_name_index(NameItem(false_name, None, form)) return item = NameItem(false_name, true_name, form) self.process(item, file_paths, target_dir=target_dir) self.update_name_index(item)
def process_new(self, prompt=True): """Processes all new measurements Updates name index with the new entries now found in the name index previously. Returns: None """ for false_name, rigs_and_file_paths in self.urls.items(): for rig, file_paths in rigs_and_file_paths.items(): try: ni = self.name_index.find(false_name=false_name) item = ni.items[0] if ni else None if item and item.form == 'ignore': continue # TODO: Infer form from the file path file_paths = [os.path.abspath(p) for p in file_paths] if rig == 'gras': form = 'onear' target_dir = os.path.join(DIR_PATH, 'data', 'onear', 'GRAS 43AG-7') elif rig == 'legacy': form = 'onear' target_dir = os.path.join(DIR_PATH, 'data', 'onear', 'Ears-711') else: form = None target_dir = os.path.join(DIR_PATH, 'data', 'inear') if item and item.true_name: # Name index contains the entry if not self.existing.find(true_name=item.true_name): # Doesn't exist yet if form is not None: item.form = form self.process(item, file_paths, target_dir=target_dir) else: # Unknown item if prompt: # Prompt true name and form print(f'\n"{false_name}" is not known.') item = self.prompt(false_name, form=form) if item is None: self.name_index.update(NameItem(false_name, None, 'ignore'), false_name=false_name) continue self.name_index.update(item, false_name=false_name) self.process(item, file_paths, target_dir=target_dir) else: print(f'"{false_name}" is not known. Add true name and form to name index and run again.') self.name_index.update(NameItem(false_name, None, None), false_name=false_name) self.write_name_index() except Exception as err: print(f'Processing failed for "{false_name}"') raise err
def callback(true_name, form): if form == 'ignore': self.update_name_index(NameItem(false_name, None, form)) return item = NameItem(false_name, true_name, form) try: self.process(NameItem(false_name, true_name, form), url) except FileNotFoundError as err: print(err) return self.update_name_index(item)
def process_new(self, prompt=True): """Processes all new measurements Updates name index with the new entries now found in the name index previously. Returns: None """ for false_name, link in self.links.items(): try: item = self.name_index.find_by_false_name(false_name) if item and item.form == 'ignore': continue if item and item.true_name: # Name index contains the entry if self.existing.find_by_true_name(item.true_name): # Exists already, skip continue self.process(item, link) else: if prompt: print(f'\n"{false_name}" is not known.') if self.names is not None: name_options = self.names.search_by_false_name( false_name) name_options = [ match[0].true_name + (' ✓' if match[1] == 100 else '') for match in name_options ] else: name_options = [false_name] true_name = prompt_name(name_options) if true_name is None: self.name_index.update_by_false_name( NameItem(false_name, None, 'ignore')) continue true_name = true_name.replace(' ✓', '') form = prompt_form() item = NameItem(false_name, true_name, form) self.name_index.update_by_false_name(item) self.process(item, link) else: print( f'"{false_name}" is not known. Add true name and form to name index and run this again.' ) self.name_index.update_by_false_name( NameItem(false_name, None, None)) except Exception as err: raise err print(f'Failed to process {false_name}: {str(err)}')
def get_names(self): """Downloads parses phone books to get names Returns: NameIndex """ names = NameIndex() res = requests.get('https://crinacle.com/graphing/data_hp/phone_book.json') # Headphone book hp_book = self.parse_book(res.json()) for false_name, true_name in hp_book.items(): names.add(NameItem(false_name, true_name, 'onear')) res = requests.get('https://crinacle.com/graphing/data/phone_book.json') # IEM book iem_book = self.parse_book(res.json()) for false_name, true_name in iem_book.items(): names.add(NameItem(false_name, true_name, 'inear')) return names
def process_new(self, prompt=True): """Processes all new measurements Updates name index with the new entries now found in the name index previously. Returns: None """ for false_name, url in self.urls.items(): try: ni = self.name_index.find(false_name=false_name) item = ni.items[0] if ni else None if item and item.form == 'ignore': continue if item and item.true_name: # Name index contains the entry if not self.existing.find(true_name=item.true_name): # Doesn't exist already print(f'Didn\'t find {item.true_name} in existing') self.process(item, url) else: # Unknown item if prompt: # Prompt true name and form print(f'\n"{false_name}" is not known.') item = self.prompt(false_name) if item is None: self.name_index.update(NameItem( false_name, None, 'ignore'), false_name=false_name) continue self.name_index.update(item, false_name=false_name) self.process(item, url) else: print( f'"{false_name}" is not known. Add true name and form to name index and run again.' ) self.name_index.update(NameItem( false_name, None, None), false_name=false_name) self.write_name_index() except Exception as err: print(f'Processing failed for "{false_name}"') raise err
def fn(true_name, form): self.name_index.add(NameItem(false_name, true_name, form)) self.write_name_index() image_path, rig = self.download_image(report_url, image_dir, false_name, true_name, form) if image_path: callback(image_path, rig, true_name, form, data_dir, inspection_dir)
def download_images(self, url, item, image_dir): document = self.get_beautiful_soup(url) # Reports page report_urls = dict() labels = document.find_all(name='span', text=self.pro_report_regex) for label in labels: parent = label.parent.parent.parent anchor = parent.find_all('a')[1] suffix = anchor.text.lower().strip() name = item.true_name if suffix != item.false_name.lower() and suffix != 'default': name += f' ({suffix})' # The suffixes above are read automatically from the reports compilation page. # However these might not be the names that should exist in AutoEq. mods = self.name_index.find(false_name=name) if mods: # Find an item in name index which has the given name with automatic # suffixes as false name and replace the name with it's true name. true_name = mods.items[0].true_name else: # Not in the name index, prompt user print(f'Mod of "{name}" is not known.') false_name = name true_name = self.prompt_true_name([false_name]) self.name_index.add(NameItem(false_name, true_name, item.form)) self.write_name_index() report_urls[ true_name] = f'https://reference-audio-analyzer.pro{anchor["href"]}' results = [] for name, url in report_urls.items(): document = self.get_beautiful_soup(url) # Sets the driver also el = document.find(name='li', text=self.performed_on_stand_regex) try: rig = el.parent.find(name='ul').find(name='a').text except AttributeError as err: rig = 'HDM-X' if item.form == 'onear' else 'SIEC' print( f'Measurement rig could not be read for "{item.false_name}", guessing {rig}' ) try: graph = self.driver.find_element_by_id( 'response9').find_element_by_tag_name('div') # FR Graph except Exception: print(f'No graph for {item.false_name}') continue # Background image url = graph.value_of_css_property('background-image').replace( 'url("', '').replace('")', '') file_path = self.download(url, name, image_dir) results.append({ 'name': name, 'image_path': file_path, 'rig': rig, }) return results
def rename_manufacturers(): manufacturers = ManufacturerIndex() for db in DBS: if os.path.isfile(os.path.join(DIR_PATH, db, 'name_index.tsv')): # Rename entries in name index if such exists name_index = NameIndex.read_tsv( os.path.join(DIR_PATH, db, 'name_index.tsv')) for item in name_index.items: if item.form == 'ignore' or not item.true_name: continue true_name = manufacturers.replace(item.true_name) if true_name is None: print(f'"{name}" not found in manufacturers') continue if true_name == item.true_name: continue print(f'Renamed "{item.true_name}" with "{true_name}"') name_index.update( NameItem(item.false_name, true_name, item.form), item.false_name, item.true_name, item.form) name_index.write_tsv( os.path.join(DIR_PATH, db, 'name_index.tsv')) # Rename existing files existing_files = list( glob(os.path.join(DIR_PATH, db, 'data', '**', '*.csv'), recursive=True)) for fp in existing_files: dir_path, name = os.path.split(fp) name = name.replace('.csv', '') true_name = manufacturers.replace(name) if true_name is None: print(f'"{name}" not found in manufacturers') continue new_dir_path = os.path.abspath( os.path.join(dir_path, os.pardir, true_name)) new_file_path = os.path.join(new_dir_path, f'{true_name}.csv') os.makedirs(new_dir_path, exist_ok=True) if os.path.normcase( os.path.normpath(new_file_path)) != os.path.normcase( os.path.normpath(fp)): print( f'Moved "{os.path.relpath(fp, DIR_PATH)}" to "{os.path.relpath(new_file_path, DIR_PATH)}"' ) shutil.move(fp, new_file_path) try: os.rmdir(dir_path) except OSError: pass
def main(): manufacturers = ManufacturerIndex() for db in ['crinacle', 'headphonecom', 'innerfidelity', 'oratory1990', 'rtings']: if os.path.isfile(os.path.join(DIR_PATH, db, 'name_index.tsv')): name_index = NameIndex.read_tsv(os.path.join(DIR_PATH, db, 'name_index.tsv')) else: name_index = NameIndex() for item in name_index.items: if item.form == 'ignore' or not item.true_name: continue true_name = manufacturers.replace(item.true_name) if true_name is None: print(f'"{name}" not found in manufacturers') continue if true_name == item.true_name: continue print(f'Renamed "{item.true_name}" with "{true_name}"') name_index.update( NameItem(item.false_name, true_name, item.form), item.false_name, item.true_name, item.form ) if name_index: name_index.write_tsv(os.path.join(DIR_PATH, db, 'name_index.tsv')) existing = list(glob(os.path.join(DIR_PATH, db, 'data', '**', '*.csv'), recursive=True)) for fp in existing: dir_path, name = os.path.split(fp) name = name.replace('.csv', '') true_name = manufacturers.replace(name) if true_name is None: print(f'"{name}" not found in manufacturers') continue new_dir_path = os.path.abspath(os.path.join(dir_path, os.pardir, true_name)) new_file_path = os.path.join(new_dir_path, f'{true_name}.csv') os.makedirs(new_dir_path, exist_ok=True) if os.path.normcase(os.path.normpath(new_file_path)) != os.path.normcase(os.path.normpath(fp)): print(f'Moved "{os.path.relpath(fp, DIR_PATH)}" to "{os.path.relpath(new_file_path, DIR_PATH)}"') shutil.move(fp, new_file_path) try: os.rmdir(dir_path) except OSError: pass
def get_name_proposals(self, false_name, n=4, normalize_digits=False, normalize_extras=False, threshold=60): """Prompts manufacturer, model and form from the user Args: false_name: Name as it exists in the measurement source n: Number of proposals to return normalize_digits: Normalize all digits to zeros before calculating fuzzy string matching score normalize_extras: Remove extra details in the parentheses threshold: Score threshold Returns: NameItem """ def fuzzy(fn, a, b): a = a.lower() b = b.lower() if normalize_digits: a = re.sub(r'\d', '0', a).strip() b = re.sub(r'\d', '0', b).strip() if normalize_extras: a = re.sub(r'\(.+\)$', '', a).strip() b = re.sub(r'\(.+\)$', '', b).strip() return fn(a, b) manufacturer, manufacturer_match = self.manufacturers.find(false_name) if not manufacturer: return NameIndex([]) false_model = re.sub(re.escape(manufacturer_match), '', false_name, flags=re.IGNORECASE).strip() # Select only the items with the same manufacturer models = self.name_proposals[self.name_proposals.manufacturer == manufacturer] # Calculate ratios partial_ratios = [ fuzzy(fuzz.partial_ratio, model, false_model) for model in models.model.tolist() ] ratios = [ fuzzy(fuzz.ratio, model, false_model) for model in models.model.tolist() ] models = models.assign(partial_ratio=partial_ratios) models = models.assign(ratio=ratios) models = models[models.partial_ratio >= threshold] models.sort_values('ratio', ascending=False, inplace=True) proposals = [] for i, row in models.iterrows(): proposals.append( NameItem(None, f'{manufacturer} {row.model}', row.form)) ni = NameIndex(items=proposals) ni.df = ni.df.head(n) return ni
def rename_groups(databases=DBS): with open(os.path.join(DIR_PATH, 'name_groups.tsv'), 'r', encoding='utf-8') as fh: lines = fh.read().strip().split('\n') # First column is always the true name # Create dict with each false name as key and it's true name as value name_map = dict() for line in lines: names = line.split('\t') if len(names) > 1: for i in range(1, len(names)): name_map[names[i]] = names[0] # Read name indexes and existing files for all supported measurement databases dbs = [] for db in databases: if os.path.isfile(os.path.join(DIR_PATH, db, 'name_index.tsv')): # Read name index name_index = NameIndex.read_tsv( os.path.join(DIR_PATH, db, 'name_index.tsv')) else: # No name index, create one anew name_index = NameIndex() # Read all the existing files for the database files = list( glob(os.path.join(DIR_PATH, db, 'data', '**', '*.csv'), recursive=True)) files = [{ 'name': os.path.split(file)[1].replace('.csv', ''), 'path': file } for file in files] # Save both to dbs dbs.append({'name': db, 'name_index': name_index, 'files': files}) for old_name, new_name in name_map.items(): print(f'"{old_name}" -> "{new_name}"') for db in dbs: name_index = db['name_index'] # Replace true names in name index with the new name updated_item = False matches = name_index.find(true_name=old_name) for item in matches.items: if new_name == 'ignore': name_index.update(NameItem(false_name=item.false_name, true_name=item.true_name, form='ignore'), true_name=old_name) print( f' Updated item: "{item.false_name}", "{new_name}", "ignore"' ) else: name_index.update(NameItem(false_name=item.false_name, true_name=new_name, form=item.form), true_name=old_name) print( f' Updated item: "{item.false_name}", "{new_name}", "{item.form}"' ) updated_item = True # Rename existing files for name, path in [(f['name'], f['path']) for f in db['files'] if f['name'].lower() == old_name.lower()]: if new_name == 'ignore': print(f' Removing "{os.path.split(path)[0]}"') shutil.rmtree(os.path.split(path)[0]) if not updated_item: name_index.add( NameItem(false_name=old_name, true_name=None, form='ignore')) print(f' Added item: "{old_name}", "", "ignore"') continue new_path = re.sub(re.escape(name), new_name, path) print( f' Moving "{os.path.relpath(path, DIR_PATH)}" to "{os.path.relpath(new_path, DIR_PATH)}"' ) os.makedirs(os.path.split(new_path)[0], exist_ok=True) shutil.move(path, new_path) os.rmdir(os.path.join(path, os.pardir)) matches = name_index.find(true_name=new_name) if not matches: d = path while True: d, f = os.path.split(d) if f in ['onear', 'inear', 'earbud']: form = f break name_index.add( NameItem(false_name=old_name, true_name=new_name, form=form)) print( f' Added item: "{old_name}", "{new_name}", "{form}"' ) print() for db in dbs: db['name_index'].write_tsv( os.path.join(DIR_PATH, db['name'], 'name_index.tsv'))
def prompt(self, false_name): """Prompts user for true name and form based on false name.""" form = None if self.name_proposals is not None: # Name proposals initialized, add matching entries to options in prompt matches = [] matches += self.name_proposals.search_by_false_name(false_name) matches += self.name_proposals.search_by_true_name(false_name) names_and_ratios = [] for match in matches: if not match[0].true_name: # Skip items without true name continue if match[1] == 100: # Exact match match[0].true_name += ' ✓' if match[0].true_name not in [x[0] for x in names_and_ratios]: # New match names_and_ratios.append( (match[0].true_name, match[1], match[0].form)) else: # Existing match, update ratio for i in range(len(names_and_ratios)): if match[0].true_name == names_and_ratios[i][ 0] and match[1] > names_and_ratios[i][1]: names_and_ratios[i] = (names_and_ratios[i][0], match[1], names_and_ratios[i][2]) name_options = [ x[0] for x in sorted( names_and_ratios, key=lambda x: x[1], reverse=True)[:4] ] if false_name not in name_options: name_options.append(false_name) # Add the false name # Prompt true_name = self.prompt_true_name(name_options) if true_name is None: return None # Find and replace true manufacturer name or prompt it if self.manufacturers.find(true_name)[0] is None: # Unknown manufacturer, find options with the two first words and prompt it manufacturer_options = [] for i in range(1, min(3, len(true_name.split()))): candidate = ' '.join(true_name.split()[:i]) print(candidate) manufacturer_options += self.manufacturers.search( candidate) if candidate not in [x[0] for x in manufacturer_options]: manufacturer_options.append((candidate, 0)) manufacturer_options = sorted(manufacturer_options, key=lambda x: x[1], reverse=True) manufacturer_options = [x[0] for x in manufacturer_options] manufacturer, replace = self.prompt_manufacturer( manufacturer_options) _, match = self.manufacturers.find(manufacturer) if match: # Add as a new variant in existing manufacturer for m in self.manufacturers.manufacturers: if m[0] == match: m.append(replace) else: # Add new manufacturer self.manufacturers.manufacturers.append([manufacturer]) self.manufacturers.write() # Replace true_name = self.manufacturers.replace(true_name) # Find the answer and select form for name, ratio, f in names_and_ratios: if true_name == name: form = f break true_name = true_name.replace(' ✓', '') else: true_name = self.prompt_true_name([false_name]) form = None if true_name is None: # User skipped return None if form is None: # Form not found in name proposals, prompt it form = self.prompt_form() return NameItem(false_name, true_name, form)