예제 #1
0
class ListingDBTest(unittest.TestCase):

    def setUp(self):
        mockConn = MagicMock()
        self.mockCursor = MagicMock()
        mockConn.cursor.return_value = self.mockCursor

        self.db = ListingDB(mockConn)

    def testInit(self):
        self.mockCursor.execute.assert_called_with("CREATE TABLE IF NOT EXISTS listings(pid TEXT PRIMARY KEY, availableDate TEXT)")

    def testInsertListing(self):
        values = ("9999999", "2015-07-15")
        self.db.insert(values[0], values[1])
        self.mockCursor.execute.assert_called_with("INSERT INTO listings VALUES (?, ?)", values)

    def testHasListing(self):
        pid = "123"
        hasListing = self.db.has(pid)
        self.mockCursor.execute.assert_called_with("SELECT * FROM listings WHERE pid = ?", (pid,))
예제 #2
0
 def __init__(self):
     self.goSlow = False
     self.db = ListingDB()
예제 #3
0
class BlueRidge:
    def __init__(self):
        self.goSlow = False
        self.db = ListingDB()

    def setDB(self, DB):
        self.db = DB

    def initSession(self):
        self.session = requests.session()
        headers =   {'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
                    'Accept-Encoding':'gzip,deflate,sdch',
                    'User-agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.57 Safari/537.36',
                    'Connection':'keep-alive'}
        self.session.headers.update(headers)

    def parse(self, listingsHTML):
        self.soup = BeautifulSoup(listingsHTML)
        self.rows = self.soup.findAll('p', class_='row')
        self.prices = self.getPrices()
        self.titles = self.getTitles()

    def getCount(self):
        return len(self.rows)

    def getPrices(self):
        prices = []
        for row in self.rows:
            price_span = row.find('span', class_='price')
            if price_span:
                prices.append(int(price_span.string.replace('$','')))
            else:
                prices.append(-1)
        return prices

    def getTitles(self):
        return [row.find('a', class_='hdrlnk').string for row in self.rows]

    def getListOfPids(self):
        return [row['data-pid'] for row in self.rows]

    def getListingsLessThan(self, maxPrice):
        indices = []
        for index, price in enumerate(self.prices):
            if price is not None and price <=  maxPrice:
                indices.append(index)
        
        listings = [] 
        for index in indices:
            row = self.rows[index]
            pid = row['data-pid']
            if not self.db.has(pid):
                listings.append((pid, self.titles[index], self.prices[index]))
                self.db.insert(pid, None)
        return listings

    def generateLink(self, pid):
        return "http://sfbay.craigslist.org/sfc/fuo/" + pid + ".html"

    def requestPage(self, url):
        try:
            return self.session.get(url).text
        except AttributeError:
            self.initSession()
            return self.session.get(url).text

    def getLinks(self, pids):
        return [self.generateLink(pid) for pid in pids]

    def getAnchorLinksFromPids(self, listings):
        anchorLinks = ""
        for listing in listings:
            if listing[2] == -1:
                price = ''
            else:
                price = '- ${}'.format(listing[2])
            anchorLink = "<a href=\"{}\">{} {}</a><br />".format(self.generateLink(listing[0]).encode('utf-8'), listing[1].encode('utf-8'), price)
            anchorLinks += anchorLink
        return anchorLinks
예제 #4
0
class BlueRidge:
    def __init__(self):
        self.goSlow = False
        self.db = ListingDB()

    def setDB(self, DB):
        self.db = DB

    def initSession(self):
        self.session = requests.session()
        headers =   {'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
                    'Accept-Encoding':'gzip,deflate,sdch',
                    'User-agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.57 Safari/537.36',
                    'Connection':'keep-alive'}
        self.session.headers.update(headers)

    def parse(self, listingsHTML):
        self.soup = BeautifulSoup(listingsHTML)
        self.rows = self.soup.findAll('p', class_='row')
        self.prices = self.getPrices()
        self.bedrooms = self.getBedrooms()
        self.pricesPer = self.getPricesPerBedroom()

    def getCount(self):
        return len(self.rows)

    def getPrices(self):
        prices = [int(row.find('span', class_='price').string.replace('$','')) for row in self.rows]
        return prices

    def getBedrooms(self):
        bedrooms = []
        for row in self.rows:
            brSpan = row.find('span', class_='housing')

            if brSpan is None:
                bedrooms.append(None)
            else:
                count = int(re.search("(\d+)br", str(brSpan.text)).group(1))
                bedrooms.append(count)
        
        return bedrooms

    def getPricesPerBedroom(self):
        pricesPerBR = []
        for i in range(0, len(self.prices)):
            if self.bedrooms[i] is None:
                pricesPerBR.append(None)
            else:
                pricesPerBR.append(int(self.prices[i])/int(self.bedrooms[i]))
        return pricesPerBR

    def getListOfPids(self):
        return [row['data-pid'] for row in self.rows]

    def getListingsLessThanPerBR(self, maxPrice):
        indices = []
        for index, pricePer in enumerate(self.pricesPer):
            if pricePer is not None and pricePer <=  maxPrice:
                indices.append(index)
        
        pids = []
        for index in indices:
            row = self.rows[index]
            pid = row['data-pid']
            #if not self.db.has(pid):
            pids.append(pid)
            #    self.db.insert(pid, None)
        return pids

    def getAvailableDate(self, pid):
        listingHTML = self.requestPage(self.generateLink(pid))
        soup = BeautifulSoup(listingHTML)
        date = soup.find('span', class_="housing_movein_now property_date")['date']
        return date

    def getCountAvailableAfter(self, date):
        return len(self.getListingsAvailableAfter(date))

    def getListingsAvailableAfter(self, date):
        listings = []
        pids = self.getListOfPids()
        for pid in pids:
            if not self.db.has(pid):
                if self.goSlow:
                    time.sleep(random.randint(1,3))
                listingDate = self.getAvailableDate(pid)
                if listingDate >= date and listingDate <= '2016-01-01':
                    listings.append(pid)
                self.db.insert(pid, listingDate)
        return listings

    def getListingsAvailableAfterAndLessThan(self, date, maxPrice):
        #TODO: Fix because both methods are now inserting into the database
        pids = self.getListOfPids()
        listingsSet = set(self.getListingsLessThanPerBR(maxPrice)) & set(self.getListingsAvailableAfter(date))
        listings = [listing for listing in pids if listing in listingsSet]
        return listings

    def generateLink(self, pid):
        return "http://sfbay.craigslist.org/sfc/apa/" + pid + ".html"

    def requestPage(self, url):
        try:
            return self.session.get(url).text
        except AttributeError:
            self.initSession()
            return self.session.get(url).text

    def getLinks(self, pids):
        return [self.generateLink(pid) for pid in pids]

    def getAnchorLinksFromPids(self, pids):
        links = self.getLinks(pids)
        anchorLinks = ""
        for link in links:
            anchorLink = "<a href=\"%s\">%s</a><br />" % (link, link)
            anchorLinks += anchorLink
        return anchorLinks
예제 #5
0
    def setUp(self):
        mockConn = MagicMock()
        self.mockCursor = MagicMock()
        mockConn.cursor.return_value = self.mockCursor

        self.db = ListingDB(mockConn)