class BaseParser(object): """ The basic parser that will be inherited by all. Provides an ingestion interface """ def __init__(self, listing): """ Constructor requires a listing massaged in from the an rssfeed """ self.listing = listing self.soup = BeautifulSoup(urllib.urlopen(self.listing['url']).read()) self.api = CyblerAPI() @property def valid(self): raise NotImplementedError() def _parse_soup(self): raise NotImplementedError() def ingest(self): """ The actual ingestion code """ #Use the soup to modify the listing self._parse_soup() #Check if the listing is valid and update via the API if self.valid: self.api.insert(ASSOCIATED_RESOURCE, self.listing)
def __init__(self, listing): """ Constructor requires a listing massaged in from the an rssfeed """ self.listing = listing self.soup = BeautifulSoup(urllib.urlopen(self.listing['url']).read()) self.api = CyblerAPI()
def process_feed(rss_url, city, state, Parser): """ Processes all elements in the url with the specified Parser """ api = CyblerAPI() feed = feedparser.parse(rss_url) listings = [ item for item in feed['items'] \ if not api.get("listing", _id=text.url_to_id(item["id"])) ] for listing in listings: massaged = { "id": text.url_to_id(listing["id"]), "url": listing["link"], "city": city, "state": state, "title": listing["title"], "description": text.strip_tags(listing.get("summary", "")), "type": Parser.__type__, "created_on": text.api_date_to_str(listing.get("published_parsed")) } ingest_element.delay(massaged, Parser)