def test_deplumpen_failures(self):
        soft_cases = [
            ('asdf', 'asdf'),
            ('012345', '012345'),
        ]
        for given, expected in soft_cases:
            self.assertEqual(utils.deplumpen(given), expected)

        hard_cases = [None, [], {}, (), BaseCase]
        for case in hard_cases:
            self.assertRaises(ValueError, utils.deplumpen, case)
Esempio n. 2
0
    def test_deplumpen_failures(self):
        soft_cases = [
            ('asdf', 'asdf'),
            ('012345', '012345'),
        ]
        for given, expected in soft_cases:
            self.assertEqual(utils.deplumpen(given), expected)

        hard_cases = [None, [], {}, (), BaseCase]
        for case in hard_cases:
            self.assertRaises(ValueError, utils.deplumpen, case)
Esempio n. 3
0
def path_count(pair):
    "figures out the type of the given path using the suffix (if one available)"
    try:
        if pair[0].lower().startswith('/content/early/'):
            # handles POA article variation 1 "/content/early/yyyy/mm/dd/doi/" type urls
            bits = pair[0].split('/', 6)
            bits[-1] = utils.deplumpen(bits[-1])

        elif pair[0].lower().startswith('/content/elife/early/'):
            # handles POA article variation 2 "/content/elife/early/yyyy/mm/dd/doi/" type urls
            bits = pair[0].split('/', 7)
            bits[-1] = utils.deplumpen(bits[-1])

        elif pair[0].lower().startswith('/content/elife/'):
            # handles valid but unsupported /content/elife/volume/id paths
            # these paths appear in PDF files I've been told
            bits = pair[0].split('/', 4)

        else:
            # handles standard /content/volume/id/ paths
            bits = pair[0].split('/', 3)

        art = bits[-1]
        art = art.lower()  # website isn't case sensitive, we are
        more_bits = re.split(SPLITTER, art, maxsplit=1)

        suffix = None
        if len(more_bits) > 1:
            art, suffix = more_bits
        assert suffix in TYPE_MAP, "unknown suffix %r! received: %r split to %r" % (
            suffix, pair, more_bits)
        return art, TYPE_MAP[suffix], int(pair[1])

    except AssertionError, e:
        # we have an unhandled path
        #LOG.warn("skpping unhandled path %s (%r)", pair, e)
        LOG.warn("skpping unhandled path %s", pair)
Esempio n. 4
0
def path_count(pair):
    "figures out the type of the given path using the suffix (if one available)"
    try:
        if pair[0].lower().startswith('/content/early/'):
            # handles POA article variation 1 "/content/early/yyyy/mm/dd/doi/" type urls
            bits = pair[0].split('/', 6)
            bits[-1] = utils.deplumpen(bits[-1])

        elif pair[0].lower().startswith('/content/elife/early/'):
            # handles POA article variation 2 "/content/elife/early/yyyy/mm/dd/doi/" type urls
            bits = pair[0].split('/', 7)
            bits[-1] = utils.deplumpen(bits[-1])

        elif pair[0].lower().startswith('/content/elife/'):
            # handles valid but unsupported /content/elife/volume/id paths
            # these paths appear in PDF files I've been told
            bits = pair[0].split('/', 4)
            
        else:
            # handles standard /content/volume/id/ paths
            bits = pair[0].split('/', 3)
        
        art = bits[-1]
        art = art.lower() # website isn't case sensitive, we are
        more_bits = re.split(SPLITTER, art, maxsplit=1)
        
        suffix = None
        if len(more_bits) > 1:
            art, suffix = more_bits
        assert suffix in TYPE_MAP, "unknown suffix %r! received: %r split to %r" % (suffix, pair, more_bits)
        return art, TYPE_MAP[suffix], int(pair[1])

    except AssertionError, e:
        # we have an unhandled path
        #LOG.warn("skpping unhandled path %s (%r)", pair, e)
        LOG.warn("skpping unhandled path %s", pair)
 def test_deplumpen(self):
     actual = utils.deplumpen("eLife.01234")
     self.assertEqual("e01234", actual)
Esempio n. 6
0
 def test_deplumpen(self):
     actual = utils.deplumpen("eLife.01234")
     self.assertEqual("e01234", actual)