Python BeautifulSoup.json Examples

Programming Language: Python

Namespace/Package Name: BeautifulSoup

Class/Type: BeautifulSoup

Method/Function: json

Examples at hotexamples.com: 4

Python BeautifulSoup.json - 4 examples found. These are the top rated real world Python examples of BeautifulSoup.BeautifulSoup.json extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

BeautifulSoup(30)

decompose(30)

first(30)

find_all(30)

findAll(30)

find(30)

fetch(30)

feed(30)

getText(29)

insert(20)

findChildren(19)

body(12)

close(11)

__str__(11)

encode(8)

new_tag(6)

findChild(5)

append(4)

prettify(4)

findSelect(4)

decode(4)

get(4)

__unicode__(3)

goahead(3)

lower(3)

div(3)

findall(3)

pretify(3)

__init__(3)

firstText(2)

pop(2)

data(2)

findNext(2)

read(2)

index(1)

html(1)

query(1)

json(1)

load(1)

re_left(1)

noscript(1)

orig_url(1)

partition(1)

popTag(1)

pretiffy(1)

head(1)

findNextSiblings(1)

group(1)

encodeContents(1)

attrs(1)

Example #1

Show file

File: gd.py Project: exshin/glassdoor

def get(company):
    """Performs a HTTP GET for a glassdoor page and returns
    BeautifulSoup with a .json() method
    """
    params = 'clickSource=searchBtn&typedKeyword=&sc.keyword=%s' % company
    r = requests.get('%s?%s' % (GLASSDOOR_API, params))
    soup = BeautifulSoup(r.content)
    soup.json = partial(parse, soup, raw=True)
    soup.data = lambda: json.loads(soup.json())
    return soup

Example #2

Show file

File: gd.py Project: ksjeyabarani/Java

def get(company):
    """Performs a HTTP GET for a glassdoor page and returns
    BeautifulSoup with a .json() method
    """
    params = 'clickSource=searchBtn&typedKeyword=&sc.keyword=%s' % (company);
    r = requests.get('%s?%s' % (GLASSDOOR_API, params))
    soup = BeautifulSoup(r.content)
    if soup.findAll('div', {'class': 'sortBar'}):
       link = parse_exactMatch(soup);
       r = requests.get('%s%s' % (GLASSDOOR_ROOT, link))     
       soup = BeautifulSoup(r.content)
    soup.json = partial(parse, soup, raw=True)
    soup.data = lambda: json.loads(soup.json())
    return soup

Example #3

Show file

File: utils.py Project: chembl/chembl_business_model

def metaFromDoi(doi):
    from Bio import Entrez
    from BeautifulSoup import BeautifulSoup
    from chembl_business_model.models import JournalArticles, Docs

    doc_id = None

    meta = {'journal':{'pubDate':{}}, 'authors':[]}

    Entrez.email = settings.ADMINS[0][1]
    handle = Entrez.esearch(db="pubmed", term=str(doi))
    record = BeautifulSoup(handle.read())
    id = str(record.id.getText())
    handle = Entrez.efetch(db="pubmed", id=id, rettype="gb")
    result = BeautifulSoup(handle.read())
    meta['journal']['volume'] = result.volume.getText() if result.volume else ''
    meta['journal']['issue'] = result.issue.getText() if result.issue else ''
    meta['pubmed'] = id
    meta['doi'] = result.elocationid.getText() if result.elocationid else ''
    meta['title'] = result.articletitle.getText() if result.articletitle else ''
    meta['abstract'] = result.abstracttext.getText() if result.abstracttext else ''

    journal = result.journal
    if journal:
        meta['journal']['issn'] = journal.issn.getText() if journal.issn else ''
        meta['journal']['title'] = journal.title.getText() if journal.title else ''
        meta['journal']['ISOAbbreviation'] = journal.isoabbreviation.getText() if journal.isoabbreviation else ''
        pubdate = journal.pubdate
        if pubdate:
            meta['journal']['pubDate']['year'] = pubdate.year.getText() if pubdate.year else ''
            meta['journal']['pubDate']['month'] = pubdate.month.getText() if pubdate.month else ''
            meta['journal']['pubDate']['day'] = pubdate.day.getText() if pubdate.day else ''

    if result.authorlist:
        for i in result.authorlist.childGenerator():
            if i and str(i).strip():
                author = BeautifulSoup(str(i))
                auth = {}
                if author.forename:
                    auth['forename'] = author.forename.getText()
                    auth['lastname'] = author.lastname.getText()
                    auth['initials'] = author.initials.getText()
                    meta['authors'].append(auth)

    try:
        pubmedId = int(doi)
        print 'searching doc of pubmed_id = %s' % pubmedId
        q = Docs.objects.filter(pubmed_id = pubmedId)

    except ValueError:
        print 'searching doc of doi = %s' % doi
        q = Docs.objects.filter(doi__exact = doi)

    if len(q):
        doc_id = q[0].pk
    else:
        print 'searchuin'
        q = Docs.objects.filter(pubmed_id = int(id))
        if len(q):
            doc_id = q[0].pk
        elif meta.get('doi'):
            q = Docs.objects.filter(doi__exact = meta['doi'])
            if len(q):
                doc_id = q[0].pk

    if doc_id:
        doc = q[0]
        journal = doc.journal
        arts = JournalArticles.objects.filter(pk=doc_id)
        art = None
        if len(arts):
            art = arts[0]
        if not meta['journal']['title']:
            meta['journal']['title'] = journal.title if journal else None
        if not meta['journal']['ISOAbbreviation']:
            meta['journal']['ISOAbbreviation'] = journal.iso_abbreviation if journal else None
        if not meta['journal']['issn']:
            meta['journal']['issn'] = journal.issn_print if journal else None
        if not meta['journal']['issn']:
            meta['journal']['issn'] = journal.issn_electronic if journal else None
        meta['journal']['volume'] = doc.volume
        meta['journal']['issue'] = doc.issue
        if not meta['journal']['pubDate']['year']:
            meta['journal']['pubDate']['year'] = art.year if art else None
        if not meta['journal']['pubDate']['month']:
            meta['journal']['pubDate']['month'] = art.month if art else None
        if not meta['journal']['pubDate']['day']:
            meta['journal']['pubDate']['day'] = art.day if art else None
        meta['journal']['pagination'] = art.pagination if art else None
        meta['first_page'] = doc.first_page
        meta['last_page'] = doc.last_page
        if not meta['title']:
            meta['title'] = doc.title
        if not meta['abstract']:
            meta['abstract'] = doc.abstract
        if not meta['authors']:
            meta['authors'] = doc.authors

    meta['doc_id'] = doc_id

    meta['chembl_like'] = "No"

    title =  urlquote(meta['title'])
    abstract  = urlquote(meta['abstract'])
    url = '%sCHEMBLLIKE/%s/%s' % (settings.PIPLINE_PILOT_ENDPOINT, title, abstract)
    try:
        result = requests.get(url, timeout=60)
        status = result.status_code

        if status != 200:
            pass
        else:
            if result.json()["Prediction"]:
                meta['chembl_like'] = "Yes"
    except:
        pass

    return meta

Example #4

Show file

File: utils.py Project: tsufz/chembiohub_ws

def metaFromDoi(doi):
    from Bio import Entrez
    from BeautifulSoup import BeautifulSoup
    from chembl_business_model.models import JournalArticles, Docs

    doc_id = None

    meta = {'journal': {'pubDate': {}}, 'authors': []}

    Entrez.email = settings.ADMINS[0][1]
    handle = Entrez.esearch(db="pubmed", term=str(doi))
    record = BeautifulSoup(handle.read())
    id = str(record.id.getText())
    handle = Entrez.efetch(db="pubmed", id=id, rettype="gb")
    result = BeautifulSoup(handle.read())
    meta['journal']['volume'] = result.volume.getText(
    ) if result.volume else ''
    meta['journal']['issue'] = result.issue.getText() if result.issue else ''
    meta['pubmed'] = id
    meta['doi'] = result.elocationid.getText() if result.elocationid else ''
    meta['title'] = result.articletitle.getText(
    ) if result.articletitle else ''
    meta['abstract'] = result.abstracttext.getText(
    ) if result.abstracttext else ''

    journal = result.journal
    if journal:
        meta['journal']['issn'] = journal.issn.getText(
        ) if journal.issn else ''
        meta['journal']['title'] = journal.title.getText(
        ) if journal.title else ''
        meta['journal']['ISOAbbreviation'] = journal.isoabbreviation.getText(
        ) if journal.isoabbreviation else ''
        pubdate = journal.pubdate
        if pubdate:
            meta['journal']['pubDate']['year'] = pubdate.year.getText(
            ) if pubdate.year else ''
            meta['journal']['pubDate']['month'] = pubdate.month.getText(
            ) if pubdate.month else ''
            meta['journal']['pubDate']['day'] = pubdate.day.getText(
            ) if pubdate.day else ''

    if result.authorlist:
        for i in result.authorlist.childGenerator():
            if i and str(i).strip():
                author = BeautifulSoup(str(i))
                auth = {}
                if author.forename:
                    auth['forename'] = author.forename.getText()
                    auth['lastname'] = author.lastname.getText()
                    auth['initials'] = author.initials.getText()
                    meta['authors'].append(auth)

    try:
        pubmedId = int(doi)
        print 'searching doc of pubmed_id = %s' % pubmedId
        q = Docs.objects.filter(pubmed_id=pubmedId)

    except ValueError:
        print 'searching doc of doi = %s' % doi
        q = Docs.objects.filter(doi__exact=doi)

    if len(q):
        doc_id = q[0].pk
    else:
        print 'searchuin'
        q = Docs.objects.filter(pubmed_id=int(id))
        if len(q):
            doc_id = q[0].pk
        elif meta.get('doi'):
            q = Docs.objects.filter(doi__exact=meta['doi'])
            if len(q):
                doc_id = q[0].pk

    if doc_id:
        doc = q[0]
        journal = doc.journal
        arts = JournalArticles.objects.filter(pk=doc_id)
        art = None
        if len(arts):
            art = arts[0]
        if not meta['journal']['title']:
            meta['journal']['title'] = journal.title if journal else None
        if not meta['journal']['ISOAbbreviation']:
            meta['journal'][
                'ISOAbbreviation'] = journal.iso_abbreviation if journal else None
        if not meta['journal']['issn']:
            meta['journal']['issn'] = journal.issn_print if journal else None
        if not meta['journal']['issn']:
            meta['journal'][
                'issn'] = journal.issn_electronic if journal else None
        meta['journal']['volume'] = doc.volume
        meta['journal']['issue'] = doc.issue
        if not meta['journal']['pubDate']['year']:
            meta['journal']['pubDate']['year'] = art.year if art else None
        if not meta['journal']['pubDate']['month']:
            meta['journal']['pubDate']['month'] = art.month if art else None
        if not meta['journal']['pubDate']['day']:
            meta['journal']['pubDate']['day'] = art.day if art else None
        meta['journal']['pagination'] = art.pagination if art else None
        meta['first_page'] = doc.first_page
        meta['last_page'] = doc.last_page
        if not meta['title']:
            meta['title'] = doc.title
        if not meta['abstract']:
            meta['abstract'] = doc.abstract
        if not meta['authors']:
            meta['authors'] = doc.authors

    meta['doc_id'] = doc_id

    meta['chembl_like'] = "No"

    title = urlquote(meta['title'])
    abstract = urlquote(meta['abstract'])
    url = '%sCHEMBLLIKE/%s/%s' % (settings.PIPLINE_PILOT_ENDPOINT, title,
                                  abstract)
    try:
        result = requests.get(url, timeout=60)
        status = result.status_code

        if status != 200:
            pass
        else:
            if result.json()["Prediction"]:
                meta['chembl_like'] = "Yes"
    except:
        pass

    return meta