Esempio n. 1
0
def subdir_recurse(listing, path, depth=1):
    # XXX add a knob for this
    if depth > 256:
        _out.die('reached max recursion depth')

    for idx in range(len(listing)):
        d_or_f, fname = (listing[idx]['type'], listing[idx]['name'])
        this_path     = ''.join([path, '/', fname])

        if d_or_f == 'directory':
            listing[idx] = subdir_recurse(listing[idx]['contents'],
                                          this_path, depth+1)
        elif d_or_f == 'file':
            title          = Struct()
            title.basename = path
            title.name     = fname
            title.path     = this_path
            listing[idx]   = title
        else: # sanity check
            _out.die('BUG: unsupported file type `{}`'.format(d_or_f))

    if depth != 1:
        return listing

    return _util.flatten_sublists(listing)
Esempio n. 2
0
def main():
    try:
        _g.conf = Struct()
        args    = init_args()

        local_import()
        init_config()

        if args.outdir:
            _g.conf._outdir = args.outdir
        else:
            _g.conf._outdir = _g.conf._default_outdir

        if args.auth:
            up = args.auth.split(':', 1)

            if len(up) == 1 or '' in up:
                _out.die('argument -a: bad auth format')

            _g.conf._user, _g.conf._pass = up

        if args.silent or _g.conf._no_output:
            # go ahead and set this so it is globally known.
            # there is no need for distinction at this point.
            _g.conf._no_output = True
            _g.log.addFilter(nullfilter)

        ret = main_loop(args.manga)
    except (KeyboardInterrupt, EOFError) as e:
        print()
        _out._('caught {} signal, exiting...'.format(type(e).__name__))
        return 0

    return ret
Esempio n. 3
0
def create_nwo_path(name):
    '''Create the exact path that the manga `name` should be in.

       This path is constructed in the `New World Order` format
       described here: manga.madokami.com/Info/NewWorldOrder.txt

       Parameters:
       name - the name of the manga to convert to NWO format.
    '''
    if not name:
        _out.die('need a name with at least one character!')
        return None

    name = re.sub(r'^(the|an?) ', '', name, flags=re.I)
    name = name.upper()

    return re.sub(r'^(.)(.|)?(.|)?(.|)?.*', r'\1/\1\2/\1\2\3\4', name)
Esempio n. 4
0
def check_preftags(vc, vcq, fo, allf, npref, v_or_c):
    # v_or_c: True -> vol, False -> chp
    if v_or_c:
        ftupidx = 1
        what = 'vol'
        whatls = fo._vols
    else:
        ftupidx = 2
        what = 'chp'
        whatls = fo._chps

    if fo._preftag:
            for ftup in allf:
                if vc in ftup[ftupidx]:
                    _g.log.info('replacing {} with preferred'
                             ' tag {}'.format(ftup[0], fo._f))
                    allf.remove(ftup)
                    vcq.extend(whatls)
                    return 'break'
            else:
                _out.die("BUG: couldn't find any dup {} in {} "
                    "when replacing with pref tag".format(what, whatls),
                    lvl='critical')
    elif not fo._npreftag and npref:
        for t in npref:
            if vc in t[ftupidx]:
                tup = t
                break
        else:
            _g.log.warning('dup vol and chps seen')
            return 'break'

        _g.log.info('replacing nonpreferred {} '
                 'with {}'.format(tup[0], fo._f))
        allf.remove(tup)
        npref.remove(tup)
        return 'continue'

    return None
Esempio n. 5
0
def rem_subdir_recurse(listing, path, depth=1):
    # XXX add a knob for this
    if depth > 256:
        _out.die('reached max recursion depth')

    for idx in range(len(listing)):
        # madokami's FTP LIST format is long ls, [{}/ are meta tokens]:
        # {d,-}rwxrwxrwx 1 u g sz mon day y/time fname
        #  |                                     |
        #  |=> directory or regular file         |=> filename
        #
        # XXX: while highly unlikely that whitespace gives any significant
        # distinction beyond one space, the split() module splits by any amount         # of wspace; thus, when re-join()ed, any extra wspace is truncated to
        # one space.
        fields        = listing[idx].split()
        d_or_f, fname = (fields[0][:1], ' '.join(fields[8:]))
        this_path     = ''.join([path, '/', fname])

        if d_or_f == 'd':
            listing[idx] = rem_subdir_recurse(search_exact(this_path, True)
                                                .getvalue()
                                                .decode()
                                                .splitlines(),
                                              this_path, depth+1)
        elif d_or_f == '-': # is reg file
            title          = Struct()
            title.basename = path
            title.name     = fname
            title.path     = this_path
            listing[idx]   = title
        else: # sanity check
            _out.die('BUG: unsupported file type `{}`'.format(d_or_f))

    if depth != 1:
        return listing

    return _util.flatten_sublists(listing)
Esempio n. 6
0
    def __init__(self, f, title):
        if not f:
            _out.die('File parameter is empty!')

        ParseCommon.__init__(self)
        self._f     = f
        self._tag   = []
        self._title = ''
        # Token abbreviations:
        # EXT -> Extension
        # GRB -> Group Beginning
        # GRE -> Group End
        # RNG -> Range
        # DLM -> Delimiter
        # VOL -> Volume
        # CHP -> Chapter
        # ALL -> Complete Archive
        # ART -> Artbook
        # PLT -> Pilot
        # PRL -> Prolog
        # PRE -> Prelude
        # PRO -> Prototype
        # OMK -> Omake
        # NUM -> Number
        # COM -> Comma Separator
        # DAT -> Data
        #
        # Multi-character alpha regex have to
        # be checked in a certain order because
        # they are then grouped with logical `ORs`.
        # In case of mismatches, The logic following
        # the matching trys to sort out the tokens in
        # a somewhat sane matter.
        #
        # NOTE: anything starting with `v` needs to be put _before_ VOL
        #       anything starting with `c` needs to be put _before_ CHP
        tok_spec = [
            ('EXT', r'\.[^\.]+$')            ,
            ('GRB', r'(\(|\[|<|\{)')         ,
            ('GRE', r'(\)|\]|>|\})')         ,
            ('RNG', r'(-|\.\.(?=[^.]*[.]))') , # assertion checks for EXT `.`
            ('DLM', r'(-|_|\.|\s+)')         ,
            ('VOL', r'''(?x)
                        v(ol(ume)?)?
                        (?=(-|_|\.|\s+)*[0-9]) # look-ahead assertion
                     ''') ,
            ('CHP', r'''(?x)
                        (c(h(a?p(ter)?)?)?|e(p(isode)?)?)
                        (?=(-|_|\.|\s+)*[0-9])
                     ''') ,
            ('ALL', r'complete')  ,
            ('ART', r'artbook')   ,
            ('PLT', r'pilot')     ,
            ('PRL', r'prologu?e') ,
            ('PRE', r'prelude')   ,
            ('PRO', r'prototype') ,
            ('OMK', r'''(?x)
                        \+?(?=(-|_|\.|\s+)*)
                        (omake|extra|bonus|special)
                     ''') ,
            ('NUM', r'\d+(\.\d+)?') ,
            ('COM', r',')           ,
            ('DAT', r'.')           ,
        ]
        tok_regex = '|'.join('(?P<%s>%s)' % p for p in tok_spec)

        for t in re.finditer(tok_regex, f, flags=re.I):
            typ = t.lastgroup
            val = t.group(typ)
            self._alltoks.append({'typ' : typ, 'val' : val})

        if self._alltoks[-1]['typ'] != 'EXT':
            _out.die('Encountered a file without an extension, which is '
                     'not currently supported. Bailing.', lvl='FATAL')

        for t in self._alltoks:
            if t['typ'] == 'NUM':
                t['raw'] = t['val']
                t['val'] = float(t['val'])

        # variable stores whether vol or chp
        # was seen last. True = vol, False = chp
        self.last    = None
        self.seenchp = False
        self.seenvol = False
        self.other   = None
        wildnums     = []

        while self._idx < len(self._alltoks):
            t = self.cur_tok_typ()

            _g.log.debug('{} {}'.format(self._idx, t))

            if t == 'VOL':
                self.last = True

                if not self.seenvol:
                    self.seenvol = True

                vidx = self._idx
                self.eat_delim()

                if self.cur_tok_typ() != 'NUM':
                    self.regex_mismatch('DAT', 'VOL', vidx)
                    self._idx += 1
                    continue

                vval = self.cur_tok_val()
                self._vols.append(vval)

                # we need this line in case of a range
                # with a fractional e.g. vol1.5-3 in which
                # case we assume the successive volumes are
                # whole volumes.
                vval = int(vval) + 1
                self.eat_delim(True)

                if self.cur_tok_typ() == 'RNG':
                    self.eat_delim()

                    if self._idx == len(self._alltoks):
                        # open-ended range
                        self._vols.append(vidx)
                        self._vols.append(self.ALL)
                        continue
                    elif self.cur_tok_typ() == 'NUM':
                        for n in range(vval, int(self.cur_tok_val()+1)):
                            self._vols.append(float(n))

                        if self.cur_tok_val() % 1:
                            self._vols.append(self.cur_tok_val())

                        self._idx += 1

                continue # XXX
            elif t == 'CHP':
                self.last = False
                if not self.seenchp:
                    self.seenchp = True

                cidx = self._idx
                self.eat_delim()

                if self.cur_tok_typ() != 'NUM':
                    self.regex_mismatch('DAT', 'VOL', vidx)
                    self._idx += 1
                    continue

                cval = self.cur_tok_val()
                self._chps.append(cval)

                # we need this line in case of a range
                # with a fractional e.g. vol1.5-3 in which
                # case we assume the successive volumes are
                # whole volumes.
                cval = int(cval) + 1
                self.eat_delim(True)

                if self.cur_tok_typ() == 'RNG':
                    self.eat_delim()

                    if self._idx == len(self._alltoks):
                        # open-ended range
                        self._vols.append(vidx)
                        self._vols.append(self.ALL)
                        continue
                    elif self.cur_tok_typ() == 'NUM':
                        for n in range(cval, int(self.cur_tok_val()+1)):
                            self._chps.append(float(n))

                        if self.cur_tok_val() % 1:
                            self._chps.append(self.cur_tok_val())

                        self._idx += 1

                continue # XXX
            elif t == 'COM':
                if self.last is None:
                    self.regex_mismatch('DAT', 'COM')
                    continue

                comidx = self._idx
                self.eat_delim()

                if self.cur_tok_typ() != 'NUM':
                    self.regex_mismatch('DAT', 'COM', comidx)
                    continue

                comval = self.cur_tok_val()
                self.push_to_last(comval)
                self.eat_delim(True)

                if self.cur_tok_typ() == 'RNG':
                    comval = int(comval) + 1
                    self.eat_delim()

                    if self.cur_tok_typ() == 'NUM':
                        for n in range(comval, int(self.cur_tok_val())+1):
                            self.push_to_last(float(n))
            elif t == 'RNG':
                self.regex_mismatch('DLM', 'RNG')
            elif t == 'NUM':
                # spotted a number without a vol/chp prefix
                nidx = self._idx
                self.eat_delim(True)

                if self.cur_tok_typ() == 'COM':
                    self.eat_delim()

                    if self.cur_tok_typ() != 'NUM':
                        self.regex_mismatch('DAT', 'NUM', nidx)
                        self.regex_mismatch('DAT', 'COM')
                        self._idx += 1
                        continue

                    wildnums.append(self._alltoks[nidx])
                elif self.cur_tok_typ() == 'RNG':
                    self.eat_delim()

                    if self.cur_tok_typ() != 'NUM':
                        self.regex_mismatch('DAT', 'NUM', nidx)
                        self.regex_mismatch('DAT', 'RNG')
                        self._idx += 1
                        continue

                    st = self.get_tok_val(nidx)
                    self._alltoks[nidx]['val'] = tmprng = []
                    tmprng.append(st)
                    rngb = int(st) + 1

                    for n in range(rngb, int(self.cur_tok_val())+1):
                        tmprng.append(float(n))

                    if self.cur_tok_val() % 1:
                        tmprng.append(float(self.cur_tok_val()))

                    wildnums.append(self._alltoks[nidx])
                elif self.cur_tok_typ() == 'DAT':
                    self.regex_mismatch('DAT', 'NUM')
                else:
                    wildnums.append(self._alltoks[nidx])
            elif t in {'PLT', 'PRE', 'PRL', 'ART'}:
                # shouldn't have vol/chp
                if self._vols or self._chps:
                    self.regex_mismatch('DAT', t)
                    self._idx += 1
                    continue

                self.other = t
            elif t == 'OMK':
                # probably should have vol/chp
                if not self._vols and not self._chps:
                    _g.log.warning('regex picked up a bonus type without '
                                'a vol/chp identifier, which may be '
                                'incorrect. Adding anyway...')

                self.other = t
            elif t == 'ALL':
                self._all = True
            elif t == 'GRB':
                if self.get_tok_typ(self._idx+1) not in {'VOL', 'CHP', 'ALL',
                                              'OMK', 'PLT', 'PRE',
                                              'PRL', 'ART'}:
                    self._idx += 1

                    if (self.cur_tok_typ() == 'NUM' and
                        self.get_tok_typ(self._idx+1) != 'DAT'):
                        continue

                    tmptag = ''

                    while self.cur_tok_typ() not in {'GRE', None}:
                        if self.cur_tok_typ() == 'NUM':
                            self.regex_mismatch('DAT', 'NUM')

                        tmptag += str(self.cur_tok_val())
                        self._idx += 1

                    if self.cur_tok_val() == None:
                        _out.die('BUG: tag matching couldn`t find GRE')

                    if tmptag[:len(title)].lower().strip() == title.lower():
                        if (self.get_tok_typ(self._idx-1) in
                            {'PLT', 'PRE', 'PRO', 'PRL', 'ART', 'OMK'}):
                            continue # non-group tag with title in text

                    self._tag.append(tmptag)
            elif t == 'DAT':
                ''.join([self._title, self.cur_tok_val()])

                if self.get_tok_val(self._idx+1) == ' ':
                    self._title += ' '

            self._idx += 1

        if wildnums:
            # These are numbers that did not have
            # a prefix, so we do our best to guess.
            wnls    = [n['val'] for n in wildnums]
            wnsubls = []

            for n in wnls:
                if isinstance(n, list):
                    wnls.extend(n)
                    wnsubls.append(n)

            if wnsubls:
                for l in wnsubls: wnls.remove(l)

            del wnsubls

            if len(wildnums[0]['raw']) >= 3:
                dot = wildnums[0]['raw'].find('.')

                if -1 < dot < 2:
                    pass
                else:
                    self._chps.extend(sorted(wnls))
            elif not self._vols and not self._chps:
                if not max(wnls) % 100:
                    # assuming chp
                    self._chps.extend(sorted(wnls))
                else:
                    # assuming vol
                    self._vols.extend(sorted(wnls))
            elif not self._vols:
                # assuming vol
                self._vols.extend(sorted(wnls))
            elif not self._chps:
                # assuming chp
                self._chps.extend(sorted(wnls))

        self._title = self._title.strip()

        self._vols = sorted(set(self._vols))
        self._chps = sorted(set(self._chps))
Esempio n. 7
0
def get_listing(manga):
    badret = ('', '')

    if _g.conf._usecache:
        # XXX move this
        def match_dir(diriter, ldict):
            global mlow

            try:
                cdir = next(diriter)
            except StopIteration:
                for cdict in ldict:
                    if cdict['name'].lower() == mlow:
                        return (cdict['contents'], cdict['name'])
                return None

            for cdict in ldict:
                if cdict['name'] == cdir:
                    return match_dir(diriter, cdict['contents'])
            else:
                return None

        jsonloc = os.path.join(_g.conf._home, '.cache', 'madodl',
                               'files.json') \
            if not _g.conf._cachefile else _g.conf._cachefile

        jsondirloc = os.path.dirname(jsonloc)

        if not os.path.exists(jsonloc):
            os.makedirs(jsondirloc, 0o770, True)
            _curl.curl_json_list(jsonloc, True)

        assert os.path.exists(jsonloc)

        path = _util.create_nwo_path(manga)
        d1,d2,d3 = path.split('/')
        mdir = None

        with breaks(open(jsonloc, errors='surrogateescape')) as f:
            jobj = json.load(f)

            for o in jobj[0].get('contents'):
                if o['name'] == 'Manga':
                    jobj = o['contents']
                    break

            global mlow
            mlow = manga.lower()
            mdir, title = match_dir(iter((d1,d2,d3)), jobj) or badret

            if not mdir:
                _g.log.warning("couldn't find title in JSON file. Trying "
                               "online query.")
                _g.conf._found_in_cache = False
                raise breaks.Break

            _g.conf._found_in_cache = True
            _g.conf._cururl = 'https://{}{}{}/{}/{}/{}'.format(loc['DOMAIN'],
                                            loc['MLOC'], d1, d2, d3, title)

            _g.log.info('\n-----\n{}-----'.format(mdir))

            path = '/'.join((path, title))

            return (mdir, title, path)

    qout = search_query(manga).getvalue().decode()
    qp   = _parsers.ParseQuery()
    qp.feed(qout)

    # FIXME:
    # this is a temporary workaround to
    # filter out non-manga results until
    # madokami allows for this granularity itself.
    qp.mresultnum = 0
    qp.mresults   = []
    for url, r in qp.results:
        if r.startswith('/Manga') and r.count('/') == 5:
            qp.mresults.append([url,r])
            qp.mresultnum += 1

    if qp.mresultnum == 0:
        _out.die('manga not found')

    if qp.mresultnum > 1:
        print('Multiple matches found. Please choose from the '
              'selection below:\n')
        i = 1
        for url, f in qp.mresults:
            print('{}: {}'.format(i, os.path.basename(f)))
            i += 1

        print()

        while 1:
            try:
                ch = int(input('choice > '))
                if ch in range(1, i):
                    break
                print('Pick a number between 1 and {}'.format(i-1))
            except ValueError:
                print('Invalid input.')

        m = qp.mresults[ch-1][0]
        title = os.path.basename(qp.mresults[ch-1][1])
    else:
        m = qp.mresults[0][0]
        title = os.path.basename(qp.mresults[0][1])
        _out._('one match found: {}'.format(title))

    dirls = search_exact(m, True).getvalue().decode()

    _g.log.info('\n-----\n{}-----'.format(dirls))

    return (dirls, title, m)