Exemple #1
0
class DiffBotTest(unittest.TestCase):

    def setUp(self):
        self.diffbot = DiffBot() # dev_key has to come from the environment (DIFFBOT_KEY)
        self.test_url = 'http://nomulous.com/'
        self.test_url_id = self.diffbot.follow_add(self.test_url)['id']

    def test_http_handler_instance(self):
        self.assertIsInstance(self.diffbot.http_handler(), HttpHandler)

    def test_cache_handler_instance(self):
        self.assertIsInstance(self.diffbot.http_handler().cache_handler(), CacheHandler)

    def test_article_API(self):
        article_info = self.diffbot.article(self.test_url)

        self.assertIsInstance(article_info, dict)
        self.assertNotEqual(len(article_info), 0)

        for key in ['url', 'text', 'xpath', 'tags', 'raw_response', 'title']:
            self.assertTrue(article_info.has_key(key))

    def test_follow_add_API(self):
        follow_add_info = self.diffbot.follow_add(self.test_url)

        self.assertIsInstance(follow_add_info, dict)
        self.assertNotEqual(len(follow_add_info), 0)

        for key in ['id', 'pubDate', 'title']:
            self.assertTrue(follow_add_info.has_key(key))


    def test_follow_read_API(self):
        follow_read_info = self.diffbot.follow_read(self.test_url_id)

        self.assertIsInstance(follow_read_info, dict)
        self.assertNotEqual(len(follow_read_info), 0)

        for key in ['info', 'items']:
            self.assertTrue(follow_read_info.has_key(key))
Exemple #2
0
#!flask/bin/python

from app import app, models, db
from diffbot import DiffBot
from datetime import datetime, time

# Initiate API and get pizzas
api = DiffBot(app.config['DIFFBOT_KEY'])
cb = api.article('http://cheeseboardcollective.coop/pizza', summary=True)

# Split the text field containing the pizzas into list by new line
text = cb['text'].split('\n')

# Remove extra crap
schedule = text[1:11]

def replaceTime(t):
  for idx, val in enumerate(t):
    # if index is 0 or is even
    if idx == 0 or idx % 2 == 0:
      # convert date string to datetime object
      t[idx] = datetime.strptime(val,("%A %m/%d")).replace(year=2013) #.strftime('%s')
  return t

# converts regular list into ordered pairs
def toOrderedPairs(t):
  t = replaceTime(t)

  # TOOD: understand this
  p = zip(t[::2], t[1::2])
  return p
email = raw_input("Enter gmail id: ")
password = raw_input("Enter Password: "******"c539badebc2f19f621f616c52a0399d0"


#From http://yuji.wordpress.com/2011/06/22/python-imaplib-imap-example-with-gmail/
mail = imaplib.IMAP4_SSL('imap.gmail.com')
mail.login(email, password)
mail.list()
# Out: list of "folders" aka labels in gmail.
mail.select("inbox") # connect to inbox.

result, data = mail.uid('search', None, "ALL") # search and return uids instead
latest_email_uid = data[0].split()[-1]
result, data = mail.uid('fetch', latest_email_uid, '(RFC822)')
raw_email = data[0][1]

#print raw_email

#TODO: Process email

urls_in_email = urlpattern.findall(raw_email)
api = DiffBot(diffbot_api_key)
extracted_content = []

for urlitem in urls_in_email:
    content = api.article(urlitem[0], summary=True)
    extracted_content.append(content)

print extracted_content
email = raw_input("Enter gmail id: ")
password = raw_input("Enter Password: "******"c539badebc2f19f621f616c52a0399d0"

#From http://yuji.wordpress.com/2011/06/22/python-imaplib-imap-example-with-gmail/
mail = imaplib.IMAP4_SSL('imap.gmail.com')
mail.login(email, password)
mail.list()
# Out: list of "folders" aka labels in gmail.
mail.select("inbox")  # connect to inbox.

result, data = mail.uid('search', None,
                        "ALL")  # search and return uids instead
latest_email_uid = data[0].split()[-1]
result, data = mail.uid('fetch', latest_email_uid, '(RFC822)')
raw_email = data[0][1]

#print raw_email

#TODO: Process email

urls_in_email = urlpattern.findall(raw_email)
api = DiffBot(diffbot_api_key)
extracted_content = []

for urlitem in urls_in_email:
    content = api.article(urlitem[0], summary=True)
    extracted_content.append(content)

print extracted_content
Exemple #5
0
import lxml
from time import sleep

from sababa.models import Article

#feedzilla categories
categories = {'Art': 13,
              'Sports': 27,
              'Business': 22,
              'Technology': 30,
              'Politics': 3,
              'Health': 11,
              'Entertainment': 6,
              'Science': 8}

api = DiffBot(DIFFBOT_TOKEN)
# api.article('http://google.com', summary=True)

def open_landing(url, counter=1):
    try:
        landing = BeautifulSoup(urllib2.urlopen(url).read(), 'lxml')
        return landing
    except:
        print(counter)
        if counter == 3:
            return None
        counter += 1
        open_landing(url, counter)

def fetch_news(code):
    response = requests.get('http://api.feedzilla.com/v1/categories/{}/articles.json?count=100'.format(code))
Exemple #6
0
 def setUp(self):
     self.diffbot = DiffBot() # dev_key has to come from the environment (DIFFBOT_KEY)
     self.test_url = 'http://nomulous.com/'
     self.test_url_id = self.diffbot.follow_add(self.test_url)['id']