def subdir_recurse(listing, path, depth=1): # XXX add a knob for this if depth > 256: _out.die('reached max recursion depth') for idx in range(len(listing)): d_or_f, fname = (listing[idx]['type'], listing[idx]['name']) this_path = ''.join([path, '/', fname]) if d_or_f == 'directory': listing[idx] = subdir_recurse(listing[idx]['contents'], this_path, depth+1) elif d_or_f == 'file': title = Struct() title.basename = path title.name = fname title.path = this_path listing[idx] = title else: # sanity check _out.die('BUG: unsupported file type `{}`'.format(d_or_f)) if depth != 1: return listing return _util.flatten_sublists(listing)
def main(): try: _g.conf = Struct() args = init_args() local_import() init_config() if args.outdir: _g.conf._outdir = args.outdir else: _g.conf._outdir = _g.conf._default_outdir if args.auth: up = args.auth.split(':', 1) if len(up) == 1 or '' in up: _out.die('argument -a: bad auth format') _g.conf._user, _g.conf._pass = up if args.silent or _g.conf._no_output: # go ahead and set this so it is globally known. # there is no need for distinction at this point. _g.conf._no_output = True _g.log.addFilter(nullfilter) ret = main_loop(args.manga) except (KeyboardInterrupt, EOFError) as e: print() _out._('caught {} signal, exiting...'.format(type(e).__name__)) return 0 return ret
def create_nwo_path(name): '''Create the exact path that the manga `name` should be in. This path is constructed in the `New World Order` format described here: manga.madokami.com/Info/NewWorldOrder.txt Parameters: name - the name of the manga to convert to NWO format. ''' if not name: _out.die('need a name with at least one character!') return None name = re.sub(r'^(the|an?) ', '', name, flags=re.I) name = name.upper() return re.sub(r'^(.)(.|)?(.|)?(.|)?.*', r'\1/\1\2/\1\2\3\4', name)
def check_preftags(vc, vcq, fo, allf, npref, v_or_c): # v_or_c: True -> vol, False -> chp if v_or_c: ftupidx = 1 what = 'vol' whatls = fo._vols else: ftupidx = 2 what = 'chp' whatls = fo._chps if fo._preftag: for ftup in allf: if vc in ftup[ftupidx]: _g.log.info('replacing {} with preferred' ' tag {}'.format(ftup[0], fo._f)) allf.remove(ftup) vcq.extend(whatls) return 'break' else: _out.die("BUG: couldn't find any dup {} in {} " "when replacing with pref tag".format(what, whatls), lvl='critical') elif not fo._npreftag and npref: for t in npref: if vc in t[ftupidx]: tup = t break else: _g.log.warning('dup vol and chps seen') return 'break' _g.log.info('replacing nonpreferred {} ' 'with {}'.format(tup[0], fo._f)) allf.remove(tup) npref.remove(tup) return 'continue' return None
def rem_subdir_recurse(listing, path, depth=1): # XXX add a knob for this if depth > 256: _out.die('reached max recursion depth') for idx in range(len(listing)): # madokami's FTP LIST format is long ls, [{}/ are meta tokens]: # {d,-}rwxrwxrwx 1 u g sz mon day y/time fname # | | # |=> directory or regular file |=> filename # # XXX: while highly unlikely that whitespace gives any significant # distinction beyond one space, the split() module splits by any amount # of wspace; thus, when re-join()ed, any extra wspace is truncated to # one space. fields = listing[idx].split() d_or_f, fname = (fields[0][:1], ' '.join(fields[8:])) this_path = ''.join([path, '/', fname]) if d_or_f == 'd': listing[idx] = rem_subdir_recurse(search_exact(this_path, True) .getvalue() .decode() .splitlines(), this_path, depth+1) elif d_or_f == '-': # is reg file title = Struct() title.basename = path title.name = fname title.path = this_path listing[idx] = title else: # sanity check _out.die('BUG: unsupported file type `{}`'.format(d_or_f)) if depth != 1: return listing return _util.flatten_sublists(listing)
def __init__(self, f, title): if not f: _out.die('File parameter is empty!') ParseCommon.__init__(self) self._f = f self._tag = [] self._title = '' # Token abbreviations: # EXT -> Extension # GRB -> Group Beginning # GRE -> Group End # RNG -> Range # DLM -> Delimiter # VOL -> Volume # CHP -> Chapter # ALL -> Complete Archive # ART -> Artbook # PLT -> Pilot # PRL -> Prolog # PRE -> Prelude # PRO -> Prototype # OMK -> Omake # NUM -> Number # COM -> Comma Separator # DAT -> Data # # Multi-character alpha regex have to # be checked in a certain order because # they are then grouped with logical `ORs`. # In case of mismatches, The logic following # the matching trys to sort out the tokens in # a somewhat sane matter. # # NOTE: anything starting with `v` needs to be put _before_ VOL # anything starting with `c` needs to be put _before_ CHP tok_spec = [ ('EXT', r'\.[^\.]+$') , ('GRB', r'(\(|\[|<|\{)') , ('GRE', r'(\)|\]|>|\})') , ('RNG', r'(-|\.\.(?=[^.]*[.]))') , # assertion checks for EXT `.` ('DLM', r'(-|_|\.|\s+)') , ('VOL', r'''(?x) v(ol(ume)?)? (?=(-|_|\.|\s+)*[0-9]) # look-ahead assertion ''') , ('CHP', r'''(?x) (c(h(a?p(ter)?)?)?|e(p(isode)?)?) (?=(-|_|\.|\s+)*[0-9]) ''') , ('ALL', r'complete') , ('ART', r'artbook') , ('PLT', r'pilot') , ('PRL', r'prologu?e') , ('PRE', r'prelude') , ('PRO', r'prototype') , ('OMK', r'''(?x) \+?(?=(-|_|\.|\s+)*) (omake|extra|bonus|special) ''') , ('NUM', r'\d+(\.\d+)?') , ('COM', r',') , ('DAT', r'.') , ] tok_regex = '|'.join('(?P<%s>%s)' % p for p in tok_spec) for t in re.finditer(tok_regex, f, flags=re.I): typ = t.lastgroup val = t.group(typ) self._alltoks.append({'typ' : typ, 'val' : val}) if self._alltoks[-1]['typ'] != 'EXT': _out.die('Encountered a file without an extension, which is ' 'not currently supported. Bailing.', lvl='FATAL') for t in self._alltoks: if t['typ'] == 'NUM': t['raw'] = t['val'] t['val'] = float(t['val']) # variable stores whether vol or chp # was seen last. True = vol, False = chp self.last = None self.seenchp = False self.seenvol = False self.other = None wildnums = [] while self._idx < len(self._alltoks): t = self.cur_tok_typ() _g.log.debug('{} {}'.format(self._idx, t)) if t == 'VOL': self.last = True if not self.seenvol: self.seenvol = True vidx = self._idx self.eat_delim() if self.cur_tok_typ() != 'NUM': self.regex_mismatch('DAT', 'VOL', vidx) self._idx += 1 continue vval = self.cur_tok_val() self._vols.append(vval) # we need this line in case of a range # with a fractional e.g. vol1.5-3 in which # case we assume the successive volumes are # whole volumes. vval = int(vval) + 1 self.eat_delim(True) if self.cur_tok_typ() == 'RNG': self.eat_delim() if self._idx == len(self._alltoks): # open-ended range self._vols.append(vidx) self._vols.append(self.ALL) continue elif self.cur_tok_typ() == 'NUM': for n in range(vval, int(self.cur_tok_val()+1)): self._vols.append(float(n)) if self.cur_tok_val() % 1: self._vols.append(self.cur_tok_val()) self._idx += 1 continue # XXX elif t == 'CHP': self.last = False if not self.seenchp: self.seenchp = True cidx = self._idx self.eat_delim() if self.cur_tok_typ() != 'NUM': self.regex_mismatch('DAT', 'VOL', vidx) self._idx += 1 continue cval = self.cur_tok_val() self._chps.append(cval) # we need this line in case of a range # with a fractional e.g. vol1.5-3 in which # case we assume the successive volumes are # whole volumes. cval = int(cval) + 1 self.eat_delim(True) if self.cur_tok_typ() == 'RNG': self.eat_delim() if self._idx == len(self._alltoks): # open-ended range self._vols.append(vidx) self._vols.append(self.ALL) continue elif self.cur_tok_typ() == 'NUM': for n in range(cval, int(self.cur_tok_val()+1)): self._chps.append(float(n)) if self.cur_tok_val() % 1: self._chps.append(self.cur_tok_val()) self._idx += 1 continue # XXX elif t == 'COM': if self.last is None: self.regex_mismatch('DAT', 'COM') continue comidx = self._idx self.eat_delim() if self.cur_tok_typ() != 'NUM': self.regex_mismatch('DAT', 'COM', comidx) continue comval = self.cur_tok_val() self.push_to_last(comval) self.eat_delim(True) if self.cur_tok_typ() == 'RNG': comval = int(comval) + 1 self.eat_delim() if self.cur_tok_typ() == 'NUM': for n in range(comval, int(self.cur_tok_val())+1): self.push_to_last(float(n)) elif t == 'RNG': self.regex_mismatch('DLM', 'RNG') elif t == 'NUM': # spotted a number without a vol/chp prefix nidx = self._idx self.eat_delim(True) if self.cur_tok_typ() == 'COM': self.eat_delim() if self.cur_tok_typ() != 'NUM': self.regex_mismatch('DAT', 'NUM', nidx) self.regex_mismatch('DAT', 'COM') self._idx += 1 continue wildnums.append(self._alltoks[nidx]) elif self.cur_tok_typ() == 'RNG': self.eat_delim() if self.cur_tok_typ() != 'NUM': self.regex_mismatch('DAT', 'NUM', nidx) self.regex_mismatch('DAT', 'RNG') self._idx += 1 continue st = self.get_tok_val(nidx) self._alltoks[nidx]['val'] = tmprng = [] tmprng.append(st) rngb = int(st) + 1 for n in range(rngb, int(self.cur_tok_val())+1): tmprng.append(float(n)) if self.cur_tok_val() % 1: tmprng.append(float(self.cur_tok_val())) wildnums.append(self._alltoks[nidx]) elif self.cur_tok_typ() == 'DAT': self.regex_mismatch('DAT', 'NUM') else: wildnums.append(self._alltoks[nidx]) elif t in {'PLT', 'PRE', 'PRL', 'ART'}: # shouldn't have vol/chp if self._vols or self._chps: self.regex_mismatch('DAT', t) self._idx += 1 continue self.other = t elif t == 'OMK': # probably should have vol/chp if not self._vols and not self._chps: _g.log.warning('regex picked up a bonus type without ' 'a vol/chp identifier, which may be ' 'incorrect. Adding anyway...') self.other = t elif t == 'ALL': self._all = True elif t == 'GRB': if self.get_tok_typ(self._idx+1) not in {'VOL', 'CHP', 'ALL', 'OMK', 'PLT', 'PRE', 'PRL', 'ART'}: self._idx += 1 if (self.cur_tok_typ() == 'NUM' and self.get_tok_typ(self._idx+1) != 'DAT'): continue tmptag = '' while self.cur_tok_typ() not in {'GRE', None}: if self.cur_tok_typ() == 'NUM': self.regex_mismatch('DAT', 'NUM') tmptag += str(self.cur_tok_val()) self._idx += 1 if self.cur_tok_val() == None: _out.die('BUG: tag matching couldn`t find GRE') if tmptag[:len(title)].lower().strip() == title.lower(): if (self.get_tok_typ(self._idx-1) in {'PLT', 'PRE', 'PRO', 'PRL', 'ART', 'OMK'}): continue # non-group tag with title in text self._tag.append(tmptag) elif t == 'DAT': ''.join([self._title, self.cur_tok_val()]) if self.get_tok_val(self._idx+1) == ' ': self._title += ' ' self._idx += 1 if wildnums: # These are numbers that did not have # a prefix, so we do our best to guess. wnls = [n['val'] for n in wildnums] wnsubls = [] for n in wnls: if isinstance(n, list): wnls.extend(n) wnsubls.append(n) if wnsubls: for l in wnsubls: wnls.remove(l) del wnsubls if len(wildnums[0]['raw']) >= 3: dot = wildnums[0]['raw'].find('.') if -1 < dot < 2: pass else: self._chps.extend(sorted(wnls)) elif not self._vols and not self._chps: if not max(wnls) % 100: # assuming chp self._chps.extend(sorted(wnls)) else: # assuming vol self._vols.extend(sorted(wnls)) elif not self._vols: # assuming vol self._vols.extend(sorted(wnls)) elif not self._chps: # assuming chp self._chps.extend(sorted(wnls)) self._title = self._title.strip() self._vols = sorted(set(self._vols)) self._chps = sorted(set(self._chps))
def get_listing(manga): badret = ('', '') if _g.conf._usecache: # XXX move this def match_dir(diriter, ldict): global mlow try: cdir = next(diriter) except StopIteration: for cdict in ldict: if cdict['name'].lower() == mlow: return (cdict['contents'], cdict['name']) return None for cdict in ldict: if cdict['name'] == cdir: return match_dir(diriter, cdict['contents']) else: return None jsonloc = os.path.join(_g.conf._home, '.cache', 'madodl', 'files.json') \ if not _g.conf._cachefile else _g.conf._cachefile jsondirloc = os.path.dirname(jsonloc) if not os.path.exists(jsonloc): os.makedirs(jsondirloc, 0o770, True) _curl.curl_json_list(jsonloc, True) assert os.path.exists(jsonloc) path = _util.create_nwo_path(manga) d1,d2,d3 = path.split('/') mdir = None with breaks(open(jsonloc, errors='surrogateescape')) as f: jobj = json.load(f) for o in jobj[0].get('contents'): if o['name'] == 'Manga': jobj = o['contents'] break global mlow mlow = manga.lower() mdir, title = match_dir(iter((d1,d2,d3)), jobj) or badret if not mdir: _g.log.warning("couldn't find title in JSON file. Trying " "online query.") _g.conf._found_in_cache = False raise breaks.Break _g.conf._found_in_cache = True _g.conf._cururl = 'https://{}{}{}/{}/{}/{}'.format(loc['DOMAIN'], loc['MLOC'], d1, d2, d3, title) _g.log.info('\n-----\n{}-----'.format(mdir)) path = '/'.join((path, title)) return (mdir, title, path) qout = search_query(manga).getvalue().decode() qp = _parsers.ParseQuery() qp.feed(qout) # FIXME: # this is a temporary workaround to # filter out non-manga results until # madokami allows for this granularity itself. qp.mresultnum = 0 qp.mresults = [] for url, r in qp.results: if r.startswith('/Manga') and r.count('/') == 5: qp.mresults.append([url,r]) qp.mresultnum += 1 if qp.mresultnum == 0: _out.die('manga not found') if qp.mresultnum > 1: print('Multiple matches found. Please choose from the ' 'selection below:\n') i = 1 for url, f in qp.mresults: print('{}: {}'.format(i, os.path.basename(f))) i += 1 print() while 1: try: ch = int(input('choice > ')) if ch in range(1, i): break print('Pick a number between 1 and {}'.format(i-1)) except ValueError: print('Invalid input.') m = qp.mresults[ch-1][0] title = os.path.basename(qp.mresults[ch-1][1]) else: m = qp.mresults[0][0] title = os.path.basename(qp.mresults[0][1]) _out._('one match found: {}'.format(title)) dirls = search_exact(m, True).getvalue().decode() _g.log.info('\n-----\n{}-----'.format(dirls)) return (dirls, title, m)