def main(): (opts, args) = cli() key = get_value('key') td = Tmdb(key, opts.category) if opts.listing: li = Listing(opts.category) movies = li.get_movies() prefix = "list_" subject = "Week %s: %s" % (THIS_WEEK, li.title) else: movies = td.get_movies(opts.numres) prefix = "" subject = "%s movies - week %s" % (opts.category.title().replace("_", " "), THIS_WEEK) ca = Cache(prefix + os.path.basename(opts.category)) newMovies = ca.shelve_results(movies) if opts.listing: movieObjects = ca.shelve_get_items(movies) # allow dups else: movieObjects = ca.shelve_get_items(newMovies) # only new ones op = Output(movieObjects) html = [op.generate_header()] html.append(op.generate_movie_html_div()) if opts.printres: print "\n".join(html) if opts.mailres: sender = get_value('sender') recipients = load_emails('recipients') ma = Mail(sender) ma.mail_html(recipients, subject, "\n".join(html))
def empty_listing(*things): parent_name = None for t in things: try: parent_name = t.parent_name break except AttributeError: continue l = Listing(None, None, parent_name = parent_name) l.things = list(things) return Wrapped(l)
def collect_page_results(self, store): names = self.driver.find_elements_by_xpath('//a[@name="listpage_productname"]') model_numbers = self.driver.find_elements_by_xpath('//ul[@class="productInfo"]/li[@class="last"]') item_numbers = self.driver.find_elements_by_xpath('//ul[@class="productInfo"]/li[not(@class="last")]') prices = self.driver.find_elements_by_xpath('//p[@class="pricing"]/strong') self.load_next_check = model_numbers[0].text[9:] page_results = [] for i in range(0, len(names)): listing = Listing() listing.name = names[i].text listing.item_number = item_numbers[i].text[8:] listing.model_number = model_numbers[i].text[9:] listing.price = prices[i].text[1:] listing.country = store.country listing.state = store.state listing.town = store.town listing.store_number = store.store_number listing.address = store.address page_results.append(listing) return page_results
print('PAGE ' + str(page_number) + '============================================') last_page = page tree = html.fromstring(page.content) for i in range(0, 100): x_pth = post_name_xpth_prefix + str(i) + post_name_xpth_suffix name = tree.xpath(x_pth) # If this element does not exist, continue if len(name) == 0: continue try: lst = Listing(base_url + name[0].attrib['href']) except AttributeError as ae: continue print(lst.get_title()) if lst.get_title() != 'poor_err': if not db.listing_exists(lst): db.save_listing(lst, u_of_t_address) if lst.get_viability( u_of_t_address ) <= 200 and 'Wanted: ' not in lst.get_title(): mail.notify(lst, [ "*****@*****.**", "*****@*****.**" ], u_of_t_address) print('** New listing saved **')
def display_predicted_price(n_clicks, apt, ec, condo, time, radius, postal_input, property_type, floor_num, floor_area, lease): if n_clicks: ##### Current Global Listing Object ##### global curr_listing curr_listing = Listing(postal_input, property_type, int(floor_num), float(floor_area), int(lease)) global price_output, price_psm_output price_output, price_psm_output = curr_listing.pred_price( "modelling/", cols, postal_code_area, area_df, sch, train, police_centre, avg_cases) # For testing #curr_listing = Listing('597592', 'Condominium', 6, 99, 70) #curr_listing = Listing('689527', 'Condominium', 6, 99, 70) ##### Parameters of Sample Object ##### time_param = [0, 0] if (time == 'Past 5 Years'): time_param[0] = 1 elif (time == 'Past 10 Years'): time_param[1] = 1 radius_param = [0, 0] if (radius == 'Within 1km'): radius_param[0] = 1 elif (radius == 'Within 2km'): radius_param[1] = 1 ec_param, condo_param, apt_param = 0, 0, 0 # Setting default property_filter to property_type of listing if ((not apt) and (not condo) and (not ec)): if (property_type == 'Condominium'): condo_param = 1 elif (property_type == 'Apartment'): apt_param = 1 elif (property_type == 'Executive Condominium'): ec_param = 1 else: if ec: ec_param = 1 if condo: condo_param = 1 if apt: apt_param = 1 ##### Current Global Sample Object ##### global curr_sample params = { 'radius': radius_param, 'property': [ec_param, condo_param, apt_param], 'time': time_param } curr_sample = Sample(params, prelim_ds) curr_sample.get_filtered_df(prelim_ds, curr_listing.get_lon(postal_code_area), curr_listing.get_lat(postal_code_area)) curr_sample.get_map(curr_listing.get_lon(postal_code_area), curr_listing.get_lat(postal_code_area), price_psm_output, curr_listing.get_building(), curr_listing.get_road_name(), 100) map_component = html.Iframe(srcDoc=open('sample_map.html', 'r').read(), height='600') transaction_table = curr_sample.get_transaction_table() psm_timeseries_plot = html.Div([ html.Div([ 'Aggregated resale market conditions for ', html.B( curr_listing.get_planning_area(postal_code_area, area_df).title()), " planning area together with its 2 closest neighbours in the past " + str(curr_sample.get_time()) + ' years' ], style={'font-size': 'medium'}), html.Div( 'Only resale transactions of ' + ", ".join([ property + "s" for property in curr_sample.get_property() ]) + " within each planning area are included within the computation", style={'font-size': 'medium'}), curr_sample.plot_psm( prelim_ds, area_df, curr_listing.get_planning_area(postal_code_area, area_df), 2), ]) return [ overview_section(curr_listing, price_output, price_psm_output), curr_listing.get_planning_area(postal_code_area, area_df).title(), transaction_features(curr_sample), map_component, transaction_table, psm_timeseries_plot, [ 'All resale transactions of ' + ", ".join([ property + "s" for property in curr_sample.get_property() ]) + " in the past ", html.B(str(curr_sample.get_time()) + " years"), " that are within a radius of ", html.B(str(curr_sample.get_radius()) + "km"), " from your property" ] ] #### Default output # Map map_component = html.Iframe(srcDoc=open('assets/default_map.html', 'r').read(), height='600') # Timeseries filtered_df = prelim_ds.copy() filtered_df['Sale Month'] = filtered_df['Sale Date'].apply( lambda x: x.strftime('%Y-%m')) # to plot based on Year and Month filtered_df['Sale Year'] = filtered_df['Sale Date'].apply( lambda x: x.year) # to plot based on Year grp_df = filtered_df.groupby(['Sale Month', 'Planning Area']).mean().reset_index() fig = px.line( grp_df, x="Sale Month", y="PPI", #color='Planning Area', labels={ "Sale Month": "Year", "PPI": "Property Price Index" }) fig.update_layout(plot_bgcolor='#f8f4f0') # To control white space surrounding the plot fig.update_layout(margin={'t': 15, 'b': 20, 'l': 20, 'r': 30}) fig.update_layout(height=450) ts_plot = dcc.Graph(figure=fig) # Transaction Table df = prelim_ds[[ 'Sale Date', 'Address', 'Floor Number', 'Area (SQFT)', 'Remaining Lease', 'Unit Price ($ PSF)' ]].copy() df = df.rename(columns={ 'Area (SQFT)': 'Floor Area', 'BUILDING': 'Building Name' }) df = df.sort_values(by=['Sale Date'], ascending=False).head(100) df['Sale Date'] = df['Sale Date'].apply(lambda x: x.date()) table = dash_table.DataTable( data=df.to_dict('records'), columns=[{ 'id': c, 'name': c } for c in df.columns], # Remove Pagination page_action='none', #For sorting by columns sort_action="native", # For filtering rows by column values filter_action="native", #style_as_list_view=True, style_table={ 'max-height': '400px', 'font-size': '13px' }, style_cell={ 'textAlign': 'center', 'font-family': 'sans-serif', 'width': '{}%'.format(len(df.columns)) #'minWidth': '20px', 'width': '20px', 'maxWidth': '200px' }, #Controilling width of columns style_cell_conditional=[ { 'if': { 'column_id': 'Sale Date' }, 'width': '5%' }, { 'if': { 'column_id': 'Address' }, 'width': '5.5%' }, ], style_data={'padding-left': 7}, #striped rows style_data_conditional=[{ 'if': { 'row_index': 'even' }, 'backgroundColor': '#f2f2ed' #'lightgrey' }], #Fixed row for when scrolling vertically fixed_rows={'headers': True}, style_header={ 'backgroundColor': 'rgb(255, 255, 255)', 'fontWeight': 'bold', 'padding-left': 7 }, ) transaction_table = html.Div([ html.Div('Past 100 Recent Transactions', style={ 'padding-bottom': 2, 'font-size': 'xx-large' }), table ]) return [ "", 'Island Wide', transaction_features(full_sample), map_component, transaction_table, ts_plot, "Showing all resale transactions of Apartments, Condominiums, Executive Condominiums within the past 10 years" ]
def create_listing(self, expiry_time, place): listing = Listing(expiry_time, self.uni, place) self.add_listing(listing)
def scrapeSinglePage(): shortWait() allListings = driver.find_elements_by_xpath( '//div[contains(@class,\'maincontent \')]//div[contains(@class,\'propertyitem propertyitem--list\')]' ) longWait() for currentListing in allListings: #need to scroll down with each listing because of the lazy loading of images scrollDown() imageUrl = currentListing.find_element_by_css_selector( 'img').get_attribute('src') shortWait() name = currentListing.find_element_by_class_name( 'propertyitem__address--listview').text shortWait() link = currentListing.find_element_by_class_name( 'propertyitem__link').get_attribute('href') shortWait() price = currentListing.find_element_by_class_name( 'propertyitem__price').text shortWait() price = price.split('\n', 2)[-1] #determine type of listing numberOfAttributes = len( currentListing.find_elements_by_css_selector('th')) shortWait() info = currentListing.find_elements_by_css_selector('td') longWait() if (numberOfAttributes == 1): newListing = Listing(name=name, link=link, price=price, imageUrl=imageUrl, ground=info[0].text) #print(vars(newListing)) container.append(newListing) else: if (numberOfAttributes == 2): newListing = Listing(name=name, link=link, price=price, imageUrl=imageUrl, m2=info[0].text, rooms=info[1].text) container.append(newListing) elif (numberOfAttributes == 7): newListing = Listing(name=name, link=link, price=price, imageUrl=imageUrl, m2=info[0].text, ground=info[1].text, rooms=info[2].text, yearOfConstruction=info[3].text, lengthOfStay=info[4].text, plusMinus=info[5].text, rentAndConsumption=info[6].text) container.append(newListing) elif (numberOfAttributes == 8): newListing = Listing(name=name, link=link, price=price, imageUrl=imageUrl, m2=info[0].text, ground=info[1].text, rooms=info[2].text, yearOfConstruction=info[3].text, lengthOfStay=info[4].text, plusMinus=info[5].text, pricePerM2=info[6].text, ownershipCostPerMonth=info[7].text) container.append(newListing) else: print("error") #uncomment to easily see how pagination works #break shortWait() #TODO: check if the button really exists nextPageElement = driver.find_element_by_xpath( '//ul[contains(@class,\'pagination\')]//li//a[contains(text(),\'Næste\')]' ).click() shortWait()
def fetch_data(self): self.clear_data() still_searching = True search_page = 1 while still_searching: link_text_expired_listings = "https://www.trademe.co.nz/Browse/SearchResults.aspx?sort_order=bids_asc&from=advanced&advanced=true&searchstring=" + self.search_term + "¤t=0&cid=0&rptpath=all&searchregion=100&page=" + str( search_page) try: #make the request and soup exp_res = requests.get(link_text_expired_listings) exp_res.raise_for_status() expired_search_result_soup = bs4.BeautifulSoup( exp_res.text, features="html.parser") except requests.exceptions.HTTPError as err: still_searching = False print( "an HTTP error occured fetching expired listings under the search " + self.search_term) #go through all the listings on this page, checking to see if they have bids raw_listings_this_page = expired_search_result_soup.find_all( "li", class_="listingCard") for listing in raw_listings_this_page: if "Current bid" in listing.text: #the current bid section in the html indicates if bid/s have been placed on the item. #get the link and make a listing object. listing_link = listing.find('a', href=True)['href'] #if the listing link is for property or motors, prevent it being made into a listing object if "/property/" in listing_link or "/motors/" in listing_link or "/farming-forestry/" in listing_link: print("found a bad item, link: " + listing_link) else: this_listing = Listing( "https://www.trademe.co.nz" + listing_link, self.id) self.expired_listings.append(this_listing) else: #stop searching if the script hits listings without bids on the page. still_searching = False #stop searching if there are no more listings. listing_count_text_parts = expired_search_result_soup.find( 'p', class_="listing-count-holder").text.split(" ") if listing_count_text_parts[0] == listing_count_text_parts[-1]: still_searching = False search_page += 1 #--------------------------------------------------------------------------------------------------------------------------------------------------------------------------- #clean the listings that were found #clean out all the listings that: ## are not from the same category as the search term. i = 0 while i < len(self.expired_listings): if self.category.lower( ) in self.expired_listings[i].category.lower(): i += 1 else: del self.expired_listings[i] ## contain too many excluded terms TODO: this doesnt appear to be working properly, it's not important for now, but it will help refine results. while i < len(self.expired_listings): excluded_word_count = len([ ele for ele in self.excluded_terms if (ele in self.expired_listings[i].description.lower()) ]) + len([ ele for ele in self.excluded_terms if (ele in self.expired_listings[i].listingName.lower()) ]) if excluded_word_count > self.max_excluded_terms: print( "listing ID {} contained too many excluded terms. The listing will not be recorded." .format(self.expired_listings[i].id)) del self.expired_listings[i] else: i += 1 print( "finished finding expired listings for the search term '{}', returned {} results" .format(self.search_term, len(self.expired_listings))) #--------------------------------------------------------------------------------------------------------------------------------------------------------------------------- #fetch all the long term data. expired_listing_count = len(self.expired_listings) #find how many current listings there are. link_text = "https://www.trademe.co.nz/Browse/SearchResults.aspx?searchString=" + self.search_term + "&type=Search&searchType=all&user_region=100&user_district=0&generalSearch_keypresses=5&generalSearch_suggested=0&generalSearch_suggestedCategory=" #make the request and soup res = requests.get(link_text) res.raise_for_status() search_result_soup = bs4.BeautifulSoup(res.text, features="html.parser") #get the number of results returned. current_listing_count_str = search_result_soup.find_all( "h3", { "class": "tm-search-header-result-count__heading ng-star-inserted" })[0].get_text() current_listing_count = int(current_listing_count_str.split(" ")[2]) #get the median sale price of sold listings. sold_listings_prices = [] for listing in self.expired_listings: sold_listings_prices.append(listing.get_sell_price()) median_sell_price = statistics.median(sold_listings_prices) if len( self.expired_listings) > 0 else None #finally, make the long term data tuple so that these statistics can be recorded in MySQL. #format: (search_id, date, active_listings, sold_listings, median_sell_price) date = str(datetime.datetime.now()) self.long_term_data = (self.id, date, current_listing_count, expired_listing_count, median_sell_price) self.fetched_data = True
def _WriteToCache(self, listing: Listing): with open(os.path.join(self.directory, listing.id()), "w") as f: f.write(jsonpickle.encode(listing))
def parse_node(node): children = itol(node.children) listing = Listing() try: # first row chillen = itol(children[0]) listing.mls = chillen[0].find_all(text=re.compile('\d{8}'))[0].strip() listing.status = parse_string(chillen[1], 'Status: ') ginz = chillen[2].find_all(text=re.compile('\d*')) listing.dom = int(ginz[1].strip()) listing.dto = int(ginz[3].strip()) listing.sale_price = parse_money(chillen[3], 'Sale Price: ') listing.list_price = parse_money(chillen[4], 'List Price: ') # second row in_ = itol(children[1]) address1 = in_[0].getText() listing.price_sqft_list = Money(str(parse_sqft(in_[1])), 'USD') listing.sale_date = parse_date(in_[2]) listing.list_date = parse_date(in_[3]) # third row da = itol(children[3]) address2 = da[0].getText() listing.address = address1 + ' ' + address2 listing.price_sqft_sold = Money(str(parse_sqft(da[1])), 'USD') listing.off_mkt_date = parse_date(da[2]) listing.orig_price = parse_money(da[3], 'Orig. Price: ') # fourth row club = itol(children[4]) listing.style = parse_string(club[0], 'Style: ') listing.outdoor_space = parse_string(club[1], 'Outdoor Space: ') listing.assoc_fee = parse_money(club[2], 'Assoc.Fee: ') # fifth row bottle = itol(children[6]) listing.rooms = parse_int(bottle[0], 'Rooms: ') listing.beds = parse_int(bottle[1], 'Beds: ') listing.baths = parse_string(bottle[2], 'Baths: ') listing.living_area = parse_int(bottle[3], 'Living Area: ') listing.tax = parse_money(bottle[4], 'Tax: ') # sixth row full = itol(children[8]) listing.garage = parse_int(full[0], 'Garage: ') listing.parking = parse_int(full[1], 'Parking: ') listing.pets = parse_string(full[2], 'Pets: ') listing.year_built = parse_int(full[3], 'Year Built: ') listing.fy = parse_int(full[4], 'Fy: ') # seventh row of_bub = itol(children[10]) listing.remarks = parse_string(of_bub[0], 'Remarks: ') except: # I'm a monster listing.remarks = u'' pass return listing
def write_listing(job_title, job_link, org_name, source, date_posted): new_listing = Listing(job_title, job_link, org_name, source, date_posted) db.session.add(new_listing) db.session.commit()
listings = [] i = 1 while True: params = {"p": i} r = requests.get(REQUEST_URL_PREFIX, params=params) if r.text is None or search(REGEX, r.text) is None: break for match in finditer(REGEX, r.text): domain = match.group(1) bitcoin_price = match.group(2) namecoin_price = match.group(3) litecoin_price = match.group(4) peercoin_price = match.group(5) primecoin_price = match.group(6) prices = { "bitcoin_price": bitcoin_price, "namecoin_price": namecoin_price, "litecoin_price": litecoin_price, "peercoin_price": peercoin_price, "primecoin_price": primecoin_price } listings.append(Listing(domain, prices, datetime.now())) i += 1 sleep(0.5) if not exists(OUTPUT_DIR): makedirs(OUTPUT_DIR) filename = datetime.now().strftime("%Y-%m-%d-%H_%M.pickle") with open(join(OUTPUT_DIR, filename), "wb") as output_file: dump(listings, output_file, protocol=HIGHEST_PROTOCOL)