Beispiel #1
0
def check_tweet(tweet, parent=False):
    if parent:
        print("In parent")
        print(tweet.full_text)
        print(tweet.in_reply_to_status_id)
        if tweet.in_reply_to_status_id:
            tweet = api.get_status(tweet.in_reply_to_status_id, tweet_mode="extended")
            print(tweet.full_text)
        else:
            return []
    
    text = tweet.full_text
    words = text.split()
    isbnlike = isbnlib.get_isbnlike(text, level='normal')

    print(isbnlike)
    print(words)

    for word in words:
        if word.startswith("http") or word.startswith("https"):
            print(word)
            resp = requests.head(word)
            print(resp.headers["Location"])
            if "amazon" in resp.headers["Location"] and "/dp/" in resp.headers["Location"]:
                amazon_text = isbnlib.get_isbnlike(
                    resp.headers["Location"], level='normal')
                amazon_text = list(dict.fromkeys(amazon_text))
                for item in amazon_text:
                    if isbnlib.is_isbn10(item) or isbnlib.is_isbn13(item):
                        isbnlike.append(item)

    print(isbnlike)

    return isbnlike
Beispiel #2
0
def parse_csv(input_file):

    with open(os.path.abspath(input_file), encoding="utf-8",
              newline='\n') as csvfile:
        reader = csv.reader(csvfile, delimiter=',', quotechar='"')
        x = 0
        for row in reader:
            if isbnlib.get_isbnlike(row[2]):
                isbn_final = isbnlib.EAN13(isbnlib.get_isbnlike(row[2])[0])
                books.append([x, row[0], row[1], isbn_final, row[3]])
            else:
                problems.append([row[0], row[1], row[2], row[3]])
            x += 1
        return books
def _isbn(details_url):
    """Get the card isbn

    - details_url: valid url leading to the card's product page

    return: a tuple valid and clean-up isbn (str), the soup
    """
    import isbnlib
    isbn = None
    try:
        log.info("Looking for isbn of {}...".format(details_url))
        req = requests.get(details_url)
        soup = BeautifulSoup(req.content, "lxml")
        isbn = soup.find(class_="col49 floatRight")
        isbn = isbnlib.get_isbnlike(isbn.text)
        isbn = filter(lambda it: it.startswith('978'), isbn)
        if isbn:
            isbn = isbnlib.canonical(isbn[0])
            log.info("Found isbn of url {}: {}".format(details_url, isbn))

    except Exception as e:
        log.error("Error while getting the isbn from url '{}': {}".format(details_url, e))
        return isbn

    return isbn, soup
Beispiel #4
0
def get_isbn_from_file(file_name, max_pdf_pages=0):
    print "-> Getting ISBN from PDF files..."

    # PDFMiner boilerplate
    rsrcmgr = PDFResourceManager()
    sio = StringIO()
    codec = 'utf-8'
    laparams = LAParams()
    device = TextConverter(rsrcmgr, sio, codec=codec, laparams=laparams)
    interpreter = PDFPageInterpreter(rsrcmgr, device)

    # Extract text
    fp = file(file_name, 'rb')
    num_pages = 1

    for page in PDFPage.get_pages(fp, maxpages=max_pdf_pages):
        interpreter.process_page(page)
        num_pages += 1

    print "Pages processed = " + str(num_pages)
    fp.close()
    # Get text from StringIO
    text = sio.getvalue()
    # Cleanup
    device.close()
    sio.close()
    default_isbn = get_default_isbn(isbnlib.get_isbnlike(text))
    return default_isbn
Beispiel #5
0
    def clean(self):
        data = self.cleaned_data

        isbn = data.get('isbn')
        if i.get_isbnlike(isbn):
            if i.is_isbn10(isbn) or i.is_isbn13(isbn):
                return True
            raise ValidationError(
                'ISBN does not seem to be a ISBN13 or ISBN10')
        raise ValidationError('ISBN does not seem valid')
Beispiel #6
0
    def get_isbns_from_text(self):
        pages = self.get_text()
        pages_as_str = '\n'.join(pages)

        isbns = isbnlib.get_isbnlike(pages_as_str, level='normal')

        # print('unprocessed isbns: %s' % isbns)
        canonical_isbns = preprocess_isbns(isbns)

        # print('canonical isbns: %s' % canonical_isbns)
        return canonical_isbns
Beispiel #7
0
def extract_from_text(text):
    """
    Extract ISBNs from a text.

    :param text: Some text.
    :returns: A list of canonical ISBNs found in the text.

    >>> extract_from_text("978-3-16-148410-0 9783161484100 9783161484100aa abcd 0136091814 0136091812 9780136091817 123456789X")
    ['9783161484100', '9783161484100', '9783161484100', '0136091814', '123456789X']
    """
    isbns = [isbnlib.get_canonical_isbn(isbn)
             for isbn in isbnlib.get_isbnlike(text)]
    return [i for i in isbns if i is not None]
Beispiel #8
0
def extract_from_text(text):
    """
    Extract ISBNs from a text.

    :param text: Some text.
    :returns: A list of canonical ISBNs found in the text.

    >>> extract_from_text("978-3-16-148410-0 9783161484100 9783161484100aa abcd 0136091814 0136091812 9780136091817 123456789X")
    ['9783161484100', '9783161484100', '9783161484100', '0136091814', '123456789X']
    """
    isbns = [
        isbnlib.get_canonical_isbn(isbn) for isbn in isbnlib.get_isbnlike(text)
    ]
    return [i for i in isbns if i is not None]
Beispiel #9
0
 def find_isbns(cls, text):
     isbns = []
     for token in text.split():
         if token.startswith("http"):
             url = requests.head(token).headers.get("Location") or token
             for service_name in cls.SERVICES:
                 _isbns = getattr(cls, service_name)(url)
                 isbns.extend(_isbns)
         else:
             isbns.extend(isbnlib.get_isbnlike(token, level="normal"))
     return [
         isbnlib.canonical(isbn) for isbn in isbns
         if isbnlib.is_isbn10(isbn) or isbnlib.is_isbn13(isbn)
     ]
Beispiel #10
0
 def extract_isbn(value):
     try:
         isbns = isbnlib.get_isbnlike(value)
         isbn = isbns[0]
     except:
         raise ValidationError(f"Bad format {value}")
     if len(isbns) > 1:
         raise ValidationError("Too much ISBN numbers")
     elif (len(isbns) == 0) or (not isbnlib.is_isbn10(isbn) and not isbnlib.to_isbn13(isbn)):
         raise ValidationError("It is not ISBN number")
     elif len(isbns) == 1:
         return isbnlib.mask(isbn)
     else:
         raise ValidationError("Unexpected option")
Beispiel #11
0
 def get_canonical_isbn2(self, line):
     # logger.debug('[ ' + line + ' ]')
     isbns = []
     matches = isbnlib.get_isbnlike(line)
     if len(matches) > 0:
         logger.debug('Unchecked [' + ' '.join(matches) + ']')
     for match in matches:
         if match not in self.SPECIAL_ISBN and not any(match in s for s in isbns):
             try:
                 # logger.debug('isbn= ' + isbn)
                 isbn = isbnlib.get_canonical_isbn(match)
             except:
                 logger.error('Error in isbnlib while calling get_canonical_isbn')
             else:
                 if isbn:
                     isbns.append(isbn)
     return isbns
Beispiel #12
0
def abgerufen(text, pfad: Path):
    if text:
        isbns = get_isbnlike(str(text), level='normal')
    if len(isbns) == 0:
        isbns = [isbn_from_words(str(pfad.stem))]
    if len(isbns) == 0:
        return None

    m = None
    for isbn in isbns:
        try:
            m = meta(isbn)
            if m:
                break
        except:
            continue
    return m
Beispiel #13
0
 def get_canonical_isbn2(self, line):
     # logger.debug('[ ' + line + ' ]')
     isbns = []
     matches = isbnlib.get_isbnlike(line)
     if len(matches) > 0:
         logger.debug('Unchecked [' + ' '.join(matches) + ']')
     for match in matches:
         if match not in self.SPECIAL_ISBN and not any(match in s for s in isbns):
             try:
                 # logger.debug('isbn= ' + isbn)
                 isbn = isbnlib.get_canonical_isbn(match)
             except:
                 logger.error('Error in isbnlib while calling get_canonical_isbn')
             else:
                 if isbn:
                     isbns.append(isbn)
     return isbns
Beispiel #14
0
#!/usr/bin/env python

import sys
import os
import yaml
import isbnlib

metafile = sys.argv[1]
metadata = open(metafile, 'r').read()
yamldata = yaml.load(metadata)

identifier = {}

if "identifier" in yamldata:
    for id in yamldata["identifier"]:
        if "key" in id:
            isbnlike = isbnlib.get_isbnlike(id["text"])[0]
            if isbnlib.is_isbn13(isbnlike):
                identifier[id["key"]] = isbnlib.EAN13(isbnlike)

isbn = identifier[
    sys.argv[2]] if sys.argv[2] in identifier else "9786056644504"

if len(sys.argv) >= 4 and sys.argv[3] == "mask":
    print(isbnlib.mask(isbn))
else:
    print(isbn)
Beispiel #15
0
def get_ISBN_from_barcode_csv(barcode):
    with open('ISBNs.csv', 'rb') as csvfile:
        book_list = csv.reader(csvfile, delimiter=',', quotechar='|')
        for row in book_list:
            if row[0] == barcode:
                return str(isbnlib.get_isbnlike(str(row[2]))[0])
Beispiel #16
0
#!/usr/bin/env python

import sys
import os
import ruamel.yaml as yaml
import isbnlib

metafile = sys.argv[1]
metadata = open(metafile, 'r').read()
yamldata = yaml.safe_load(metadata)

identifier = {}

if "identifier" in yamldata:
    for id in yamldata["identifier"]:
        if "key" in id:
            isbnlike = isbnlib.get_isbnlike(str(id["text"]))[0]
            if isbnlib.is_isbn13(isbnlike):
                identifier[id["key"]] = isbnlib.EAN13(isbnlike)

isbn = identifier[
    sys.argv[2]] if sys.argv[2] in identifier else "9786056644504"

if len(sys.argv) >= 4 and sys.argv[3] == "mask":
    print(isbnlib.mask(isbn))
else:
    print(isbn)