Ejemplos de DiffbotClient.DiffbotClient en Python

Lenguaje de programación: Python

Namespace/Package Name: client

Clase / Tipo: DiffbotClient

Método / Función: DiffbotClient

Ejemplos en hotexamples.com: 6

Python DiffbotClient.DiffbotClient - 6 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de client.DiffbotClient.DiffbotClient extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

request(7)

DiffbotClient(6)

delete(1)

status(1)

Ejemplo n.º 1

Mostrar archivo

def api_worker(worker_number):
    """stupidly simulates long running process"""

    print('Start API {0} worker.'.format(worker_number))

    global file_q
    global api_q
    diffbot = DiffbotClient()

    while True:
        url = api_q.get()

        if url == 'kill':
            print('Kill {0} api_worker'.format(worker_number))
            break

        # print('api_q {0} size {1}'.format(worker_number, api_q.qsize()))

        try:
            response = diffbot.request(url, API_TOKEN, 'article')
        except requests.exceptions.HTTPError:
            continue

        if 'objects' in response:
            obj = response['objects'][0]

            if obj['text'] and obj.get('pageURL') == obj.get(
                    'resolvedPageURL'):
                file_q.put(obj)

Ejemplo n.º 2

Mostrar archivo

Archivo: main.py Proyecto: GuanzhouSong/diffbot-python-client

def main():
    diffbot = DiffbotClient()
    token = API_TOKEN
    url = "https://newtonfreelibrary.libcal.com/event/4924168"
    api = "product"
    response = diffbot.request(url, token, api)
    print response
    print type(response)
    print response["objects"][0]["title"]

Ejemplo n.º 3

Mostrar archivo

Archivo: learning.py Proyecto: ssreekanth2000/Six2-Microbes-GPT

def swallowURL(url):
    import time
    diffbot = DiffbotClient()
    token = API_TOKEN
    api = "article"
    response = diffbot.request(url, token, api, version=2)
    #time.sleep(1)
    if 'html' not in response:
        return
    subtexts = response['html'].split("<p>")
    res = []
    for t in subtexts:
        outgt = clean(
            t,
            fix_unicode=True,  # fix various unicode errors
            to_ascii=True,  # transliterate to closest ASCII representation
            lower=False,  # lowercase text
            no_line_breaks=
            True,  # fully strip line breaks as opposed to only normalizing them
            no_urls=False,  # replace all URLs with a special token
            no_emails=False,  # replace all email addresses with a special token
            no_phone_numbers=
            False,  # replace all phone numbers with a special token
            no_numbers=False,  # replace all numbers with a special token
            no_digits=False,  # replace all digits with a special token
            no_currency_symbols=
            False,  # replace all currency symbols with a special token
            no_punct=False,  # fully remove punctuation
            replace_with_email="<EMAIL>",
            replace_with_phone_number="<PHONE>",
            replace_with_currency_symbol="<CUR>",
            lang="en"  # set to 'de' for German special handling
        )
        outgt = re.sub("<.*?>", " ", outgt)
        ogt = outgt.strip()
        ogt = re.sub("\s\s+", " ", ogt)
        ogt = re.sub('&lt;/?[a-z]+&gt;', '', ogt)
        ogt = ogt.replace("&rsquo;", "")
        ogt = ogt.replace("&ldquo;", "")
        ogt = ogt.replace("\n", "")
        ogt = ogt.replace("\r", "")
        ogt = ogt.replace("\\n", "")
        if len(ogt) > 10:
            res.append(outgt)

    if len(res) > 2:
        fields = [''.join(e for e in url if e.isalnum()), str(res)]
        if 'http' in fields[0]:
            with open('pdfs/data.csv', 'a') as f:
                writer = csv.writer(f)
                writer.writerow(fields)

Ejemplo n.º 4

Mostrar archivo

Archivo: example.py Proyecto: rsanandres/diffbot-python-client

def diffbotScrape(my_url):
    global count
    global json_count
    global my_dict
    count = count + 1
    diffbot = DiffbotClient()
    token = "2587daf076cad7bba4e58fd272780b2d"
    url = my_url
    api = "article"
    response = diffbot.request(url, token, api, fields=['title', 'type'])
    print("\nPrinting response:\n")
    print(count)
    #forgot to add to my dict before adding it to tthe json

    my_dict[my_url] = response
    print("Writing to my_dict...\n")
    if (count == 15):
        file = "diffbot" + str(json_count) + ".json"
        with open(file, 'a+') as f:
            json.dump(my_dict, f, sort_keys=True, indent=4)
        count = 0
        json_count += 1
        my_dict = {}

Ejemplo n.º 5

Mostrar archivo

from client import DiffbotClient
from config import API_TOKEN
import pprint

print "Calling article API endpoint on the url: http://shichuan.github.io/javascript-patterns/...\n"
diffbot = DiffbotClient()
token = API_TOKEN
version = 2
url = "http://shichuan.github.io/javascript-patterns/"
api = "article"
response = diffbot.request(url, token, api, version=2)
print "\nPrinting response:\n"
pp = pprint.PrettyPrinter(indent=4)
print pp.pprint(response)

print
print "Calling article API endpoint with fields specified on the url: http://shichuan.github.io/javascript-patterns/...\n"
diffbot = DiffbotClient()
token = API_TOKEN
version = 2
url = "http://shichuan.github.io/javascript-patterns/"
api = "article"
response = diffbot.request(url,
                           token,
                           api,
                           fields=['title', 'type'],
                           version=2)
print "\nPrinting response:\n"
pp = pprint.PrettyPrinter(indent=4)
print pp.pprint(response)

Ejemplo n.º 6

Mostrar archivo

Archivo: unit_test.py Proyecto: Salil999/diffbot-python-client

 def setUp(self):
     self.patcher = patch('client.requests.get', fake_get)
     self.patcher.start()
     self.client = DiffbotClient()