Example #1
0
class BaseParser(object):
    """
    The basic parser that will be inherited by all. Provides an ingestion
    interface
    """
    
    def __init__(self, listing):
        """
        Constructor requires a listing massaged in from the an rssfeed
        """
        self.listing = listing
        self.soup = BeautifulSoup(urllib.urlopen(self.listing['url']).read())
        self.api = CyblerAPI()

    @property
    def valid(self):
        raise NotImplementedError()

    def _parse_soup(self):
        raise NotImplementedError()
        
    def ingest(self):
        """
        The actual ingestion code
        """
        #Use the soup to modify the listing
        self._parse_soup()
        #Check if the listing is valid and update via the API
        if self.valid:
            self.api.insert(ASSOCIATED_RESOURCE, self.listing)
Example #2
0
 def __init__(self, listing):
     """
     Constructor requires a listing massaged in from the an rssfeed
     """
     self.listing = listing
     self.soup = BeautifulSoup(urllib.urlopen(self.listing['url']).read())
     self.api = CyblerAPI()
Example #3
0
def process_feed(rss_url, city, state, Parser):
    """
    Processes all elements in the url with the specified Parser
    """
    api = CyblerAPI()
    feed = feedparser.parse(rss_url)
    listings = [
        item for item in feed['items'] \
        if not api.get("listing", _id=text.url_to_id(item["id"]))
        ]
    for listing in listings:
        massaged = {
            "id": text.url_to_id(listing["id"]),
            "url": listing["link"],
            "city": city,
            "state": state,
            "title": listing["title"],
            "description": text.strip_tags(listing.get("summary", "")),
            "type": Parser.__type__,
            "created_on": text.api_date_to_str(listing.get("published_parsed"))
        }
        ingest_element.delay(massaged, Parser)