def generate_cards(self, *words: str): Log.i(TAG, 'generating {} cards'.format(len(words))) file = valid_path(self.cards_file) # region Access with lock in coroutines visited = set() skipped = [] bar = ProgressBar(len(words)) lock = asyncio.Lock() # endregion async def do_generate(): sem = asyncio.Semaphore(DEFAULT_CONCURRENCY) async def do_get(word: str) -> List[str]: async with sem: try: actual, fields = await asyncio.get_running_loop().run_in_executor(None, self.get_card, word) except Exception as e: Log.e(TAG, 'can\'t get card: "{}", {}'.format(word, e)) async with lock: skipped.append(word) Log.e(TAG, 'skipped: "{}"'.format(word)) else: async with lock: bar.extra = actual bar.increment() if actual not in visited: visited.add(word) visited.add(actual) return fields # gather all tasks to keep results stable return await asyncio.gather(*[do_get(w) for w in words]) bar.update() cards = asyncio.run(do_generate()) cards = [card for card in cards if card] bar.done() with open(file, 'a', encoding='utf8') as fp: writer = csv.writer(fp) writer.writerows(cards) Log.i(TAG, 'generated {} cards to: {}'.format(len(cards), file)) if skipped: Log.e(TAG, 'skipped {} words:\n{}'.format(len(skipped), '\n'.join(skipped)))
def get_card(self, word: str) -> Tuple[str, List[str]]: Log.d(TAG, 'querying "{}"'.format(word)) response = urlopen_with_retry( URL_QUERY.format(urllib.parse.quote(word.replace('/', ' '))), fake_headers()) actual = urllib.parse.urlsplit(response.geturl()).path.rsplit('/', 1)[-1] actual = ' '.join(actual.split('-')) if not actual: raise WordNotFoundError('can\'t find: "{}"'.format(word)) if actual != ' '.join( word.replace('/', ' ').replace('-', ' ').replace( '\'', ' ').lower().split()): Log.i(TAG, 'redirected "{}" to: "{}"'.format(word, actual)) content = url_get_content(response, fake_headers()) fields = self._extract_fields(content) Log.d(TAG, 'parsed: "{}"'.format(actual)) return actual, fields
def generate_cards(self, *words: str): Log.i(TAG, 'trying to generate {} cards'.format(len(words))) visited = set() skipped = [] cf = valid_path(self.cards_file) with open(cf, 'a', encoding='utf8') as fp: for word in words: if word in visited: Log.i(TAG, 'skipping duplicate: "{}"'.format(word)) continue try: actual, fields = self.get_card(word) except Exception as e: Log.e(TAG, e) skipped.append(word) Log.w(TAG, 'skipped: "{}"'.format(word)) else: if fp.tell(): fp.write('\n') fp.write('\t'.join(fields)) visited.add(word) visited.add(actual) if skipped: Log.w( TAG, 'skipped {} words:\n'.format(len(skipped)) + '\n'.join(skipped)) Log.i( TAG, 'generated {} cards to: {}'.format(len(words) - len(skipped), cf))
def _retrieve_styling(self) -> str: Log.i(TAG, 'retrieving styling') style = url_get_content(URL_STYLE, fake_headers()) font = url_save_guess_file(URL_FONT, fake_headers())[0] # add '_' to tell Anki that the file is used by template _font = url_save(URL_FONT, headers=fake_headers(), filename=valid_path( os.path.join(self.media_path, '_' + font)))[0] Log.i(TAG, 'saved font file to: {}'.format(_font)) _font = os.path.basename(_font) style = re.sub(r'url\([\S]*?/{}'.format(font), 'url({}'.format(_font), style) style = '<style>{}</style>'.format(style) style += '<script type="text/javascript">{}</script>'.format( url_get_content(URL_AMP, fake_headers())) style += '<script type="text/javascript">{}</script>'.format( url_get_content(URL_AMP_ACCORDION, fake_headers())) Log.i(TAG, 'retrieved styling') return style
def generate_styling(self): Log.i(TAG, 'generating styling') sf = valid_path(self.styling_file) with open(sf, 'w', encoding='utf8') as fp: fp.write(self._styling) Log.i(TAG, 'generated styling to: {}'.format(sf))
def generate_back_template(self): Log.i(TAG, 'generating back template') btf = valid_path(self.back_template_file) with open(btf, 'w', encoding='utf8') as fp: fp.write(self._back_template) Log.i(TAG, 'generated back template to: {}'.format(btf))
def generate_front_template(self): Log.i(TAG, 'generating front template') ftf = valid_path(self.front_template_file) with open(ftf, 'w', encoding='utf8') as fp: fp.write(self._front_template) Log.i(TAG, 'generated front template to: {}'.format(ftf))