Esempio n. 1
0
def main():
  (opts, args) = cli()
  key = get_value('key')
  td = Tmdb(key, opts.category)
  if opts.listing:
    li = Listing(opts.category)
    movies = li.get_movies()
    prefix = "list_"
    subject = "Week %s: %s" % (THIS_WEEK, li.title)
  else:
    movies = td.get_movies(opts.numres) 
    prefix = ""
    subject = "%s movies - week %s" % (opts.category.title().replace("_", " "), THIS_WEEK)
  ca = Cache(prefix + os.path.basename(opts.category))
  newMovies = ca.shelve_results(movies)
  if opts.listing:
    movieObjects = ca.shelve_get_items(movies) # allow dups
  else:
    movieObjects = ca.shelve_get_items(newMovies) # only new ones
  op = Output(movieObjects)
  html = [op.generate_header()]
  html.append(op.generate_movie_html_div())
  if opts.printres:
    print "\n".join(html)
  if opts.mailres:
    sender = get_value('sender')
    recipients = load_emails('recipients')
    ma = Mail(sender)
    ma.mail_html(recipients, subject, "\n".join(html))
Esempio n. 2
0
def empty_listing(*things):
    parent_name = None
    for t in things:
        try:
            parent_name = t.parent_name
            break
        except AttributeError:
            continue
    l = Listing(None, None, parent_name = parent_name)
    l.things = list(things)
    return Wrapped(l)
	def collect_page_results(self, store):
			names = self.driver.find_elements_by_xpath('//a[@name="listpage_productname"]')
			model_numbers = self.driver.find_elements_by_xpath('//ul[@class="productInfo"]/li[@class="last"]')
			item_numbers = self.driver.find_elements_by_xpath('//ul[@class="productInfo"]/li[not(@class="last")]')
			prices = self.driver.find_elements_by_xpath('//p[@class="pricing"]/strong')
			self.load_next_check = model_numbers[0].text[9:]
			page_results = []
			for i in range(0, len(names)):
				listing = Listing()
				listing.name = names[i].text
				listing.item_number = item_numbers[i].text[8:]
				listing.model_number = model_numbers[i].text[9:]
				listing.price = prices[i].text[1:]
				listing.country = store.country
				listing.state = store.state
				listing.town = store.town
				listing.store_number = store.store_number
				listing.address = store.address
				page_results.append(listing)
			return page_results
Esempio n. 4
0
            print('PAGE ' + str(page_number) +
                  '============================================')
            last_page = page

        tree = html.fromstring(page.content)

        for i in range(0, 100):
            x_pth = post_name_xpth_prefix + str(i) + post_name_xpth_suffix
            name = tree.xpath(x_pth)

            # If this element does not exist, continue
            if len(name) == 0:
                continue

            try:
                lst = Listing(base_url + name[0].attrib['href'])
            except AttributeError as ae:
                continue

            print(lst.get_title())
            if lst.get_title() != 'poor_err':
                if not db.listing_exists(lst):
                    db.save_listing(lst, u_of_t_address)
                    if lst.get_viability(
                            u_of_t_address
                    ) <= 200 and 'Wanted: ' not in lst.get_title():
                        mail.notify(lst, [
                            "*****@*****.**",
                            "*****@*****.**"
                        ], u_of_t_address)
                    print('** New listing saved **')
def display_predicted_price(n_clicks, apt, ec, condo, time, radius,
                            postal_input, property_type, floor_num, floor_area,
                            lease):

    if n_clicks:

        ##### Current Global Listing Object #####
        global curr_listing
        curr_listing = Listing(postal_input, property_type, int(floor_num),
                               float(floor_area), int(lease))

        global price_output, price_psm_output
        price_output, price_psm_output = curr_listing.pred_price(
            "modelling/", cols, postal_code_area, area_df, sch, train,
            police_centre, avg_cases)

        # For testing
        #curr_listing = Listing('597592', 'Condominium', 6, 99, 70)
        #curr_listing = Listing('689527', 'Condominium', 6, 99, 70)

        ##### Parameters of Sample Object #####
        time_param = [0, 0]
        if (time == 'Past 5 Years'):
            time_param[0] = 1
        elif (time == 'Past 10 Years'):
            time_param[1] = 1

        radius_param = [0, 0]
        if (radius == 'Within 1km'):
            radius_param[0] = 1
        elif (radius == 'Within 2km'):
            radius_param[1] = 1

        ec_param, condo_param, apt_param = 0, 0, 0

        # Setting default property_filter to property_type of listing
        if ((not apt) and (not condo) and (not ec)):
            if (property_type == 'Condominium'):
                condo_param = 1
            elif (property_type == 'Apartment'):
                apt_param = 1
            elif (property_type == 'Executive Condominium'):
                ec_param = 1
        else:

            if ec:
                ec_param = 1
            if condo:
                condo_param = 1
            if apt:
                apt_param = 1

        ##### Current Global Sample Object #####
        global curr_sample
        params = {
            'radius': radius_param,
            'property': [ec_param, condo_param, apt_param],
            'time': time_param
        }
        curr_sample = Sample(params, prelim_ds)
        curr_sample.get_filtered_df(prelim_ds,
                                    curr_listing.get_lon(postal_code_area),
                                    curr_listing.get_lat(postal_code_area))
        curr_sample.get_map(curr_listing.get_lon(postal_code_area),
                            curr_listing.get_lat(postal_code_area),
                            price_psm_output, curr_listing.get_building(),
                            curr_listing.get_road_name(), 100)
        map_component = html.Iframe(srcDoc=open('sample_map.html', 'r').read(),
                                    height='600')

        transaction_table = curr_sample.get_transaction_table()

        psm_timeseries_plot = html.Div([
            html.Div([
                'Aggregated resale market conditions for ',
                html.B(
                    curr_listing.get_planning_area(postal_code_area,
                                                   area_df).title()),
                " planning area together with its 2 closest neighbours in the past "
                + str(curr_sample.get_time()) + ' years'
            ],
                     style={'font-size': 'medium'}),
            html.Div(
                'Only resale transactions of ' + ", ".join([
                    property + "s" for property in curr_sample.get_property()
                ]) +
                "  within each planning area are included within the computation",
                style={'font-size': 'medium'}),
            curr_sample.plot_psm(
                prelim_ds, area_df,
                curr_listing.get_planning_area(postal_code_area, area_df), 2),
        ])

        return [
            overview_section(curr_listing, price_output, price_psm_output),
            curr_listing.get_planning_area(postal_code_area, area_df).title(),
            transaction_features(curr_sample), map_component,
            transaction_table, psm_timeseries_plot,
            [
                'All resale transactions of ' + ", ".join([
                    property + "s" for property in curr_sample.get_property()
                ]) + "  in the past ",
                html.B(str(curr_sample.get_time()) + " years"),
                " that are within a radius of ",
                html.B(str(curr_sample.get_radius()) + "km"),
                " from your property"
            ]
        ]

    #### Default output

    # Map
    map_component = html.Iframe(srcDoc=open('assets/default_map.html',
                                            'r').read(),
                                height='600')

    # Timeseries

    filtered_df = prelim_ds.copy()
    filtered_df['Sale Month'] = filtered_df['Sale Date'].apply(
        lambda x: x.strftime('%Y-%m'))  # to plot based on Year and Month
    filtered_df['Sale Year'] = filtered_df['Sale Date'].apply(
        lambda x: x.year)  # to plot based on Year
    grp_df = filtered_df.groupby(['Sale Month',
                                  'Planning Area']).mean().reset_index()

    fig = px.line(
        grp_df,
        x="Sale Month",
        y="PPI",
        #color='Planning Area',
        labels={
            "Sale Month": "Year",
            "PPI": "Property Price Index"
        })

    fig.update_layout(plot_bgcolor='#f8f4f0')

    # To control white space surrounding the plot
    fig.update_layout(margin={'t': 15, 'b': 20, 'l': 20, 'r': 30})

    fig.update_layout(height=450)

    ts_plot = dcc.Graph(figure=fig)

    # Transaction Table
    df = prelim_ds[[
        'Sale Date', 'Address', 'Floor Number', 'Area (SQFT)',
        'Remaining Lease', 'Unit Price ($ PSF)'
    ]].copy()
    df = df.rename(columns={
        'Area (SQFT)': 'Floor Area',
        'BUILDING': 'Building Name'
    })
    df = df.sort_values(by=['Sale Date'], ascending=False).head(100)
    df['Sale Date'] = df['Sale Date'].apply(lambda x: x.date())

    table = dash_table.DataTable(
        data=df.to_dict('records'),
        columns=[{
            'id': c,
            'name': c
        } for c in df.columns],

        # Remove Pagination
        page_action='none',

        #For sorting by columns
        sort_action="native",

        # For filtering rows by column values
        filter_action="native",

        #style_as_list_view=True,
        style_table={
            'max-height': '400px',
            'font-size': '13px'
        },
        style_cell={
            'textAlign': 'center',
            'font-family': 'sans-serif',
            'width': '{}%'.format(len(df.columns))
            #'minWidth': '20px', 'width': '20px', 'maxWidth': '200px'
        },

        #Controilling width of columns
        style_cell_conditional=[
            {
                'if': {
                    'column_id': 'Sale Date'
                },
                'width': '5%'
            },
            {
                'if': {
                    'column_id': 'Address'
                },
                'width': '5.5%'
            },
        ],
        style_data={'padding-left': 7},

        #striped rows
        style_data_conditional=[{
            'if': {
                'row_index': 'even'
            },
            'backgroundColor': '#f2f2ed'
            #'lightgrey'
        }],

        #Fixed row for when scrolling vertically
        fixed_rows={'headers': True},
        style_header={
            'backgroundColor': 'rgb(255, 255, 255)',
            'fontWeight': 'bold',
            'padding-left': 7
        },
    )

    transaction_table = html.Div([
        html.Div('Past 100 Recent Transactions',
                 style={
                     'padding-bottom': 2,
                     'font-size': 'xx-large'
                 }), table
    ])

    return [
        "", 'Island Wide',
        transaction_features(full_sample), map_component, transaction_table,
        ts_plot,
        "Showing all resale transactions of Apartments, Condominiums, Executive Condominiums within the past 10 years"
    ]
Esempio n. 6
0
 def create_listing(self, expiry_time, place):
     listing = Listing(expiry_time, self.uni, place)
     self.add_listing(listing)
Esempio n. 7
0
def scrapeSinglePage():
    shortWait()

    allListings = driver.find_elements_by_xpath(
        '//div[contains(@class,\'maincontent \')]//div[contains(@class,\'propertyitem propertyitem--list\')]'
    )

    longWait()

    for currentListing in allListings:
        #need to scroll down with each listing because of the lazy loading of images
        scrollDown()
        imageUrl = currentListing.find_element_by_css_selector(
            'img').get_attribute('src')
        shortWait()

        name = currentListing.find_element_by_class_name(
            'propertyitem__address--listview').text
        shortWait()

        link = currentListing.find_element_by_class_name(
            'propertyitem__link').get_attribute('href')
        shortWait()

        price = currentListing.find_element_by_class_name(
            'propertyitem__price').text
        shortWait()
        price = price.split('\n', 2)[-1]

        #determine type of listing
        numberOfAttributes = len(
            currentListing.find_elements_by_css_selector('th'))
        shortWait()
        info = currentListing.find_elements_by_css_selector('td')
        longWait()
        if (numberOfAttributes == 1):
            newListing = Listing(name=name,
                                 link=link,
                                 price=price,
                                 imageUrl=imageUrl,
                                 ground=info[0].text)
            #print(vars(newListing))
            container.append(newListing)
        else:
            if (numberOfAttributes == 2):
                newListing = Listing(name=name,
                                     link=link,
                                     price=price,
                                     imageUrl=imageUrl,
                                     m2=info[0].text,
                                     rooms=info[1].text)
                container.append(newListing)
            elif (numberOfAttributes == 7):
                newListing = Listing(name=name,
                                     link=link,
                                     price=price,
                                     imageUrl=imageUrl,
                                     m2=info[0].text,
                                     ground=info[1].text,
                                     rooms=info[2].text,
                                     yearOfConstruction=info[3].text,
                                     lengthOfStay=info[4].text,
                                     plusMinus=info[5].text,
                                     rentAndConsumption=info[6].text)
                container.append(newListing)
            elif (numberOfAttributes == 8):
                newListing = Listing(name=name,
                                     link=link,
                                     price=price,
                                     imageUrl=imageUrl,
                                     m2=info[0].text,
                                     ground=info[1].text,
                                     rooms=info[2].text,
                                     yearOfConstruction=info[3].text,
                                     lengthOfStay=info[4].text,
                                     plusMinus=info[5].text,
                                     pricePerM2=info[6].text,
                                     ownershipCostPerMonth=info[7].text)
                container.append(newListing)
            else:
                print("error")
        #uncomment to easily see how pagination works
        #break
        shortWait()
    #TODO: check if the button really exists
    nextPageElement = driver.find_element_by_xpath(
        '//ul[contains(@class,\'pagination\')]//li//a[contains(text(),\'Næste\')]'
    ).click()
    shortWait()
Esempio n. 8
0
    def fetch_data(self):
        self.clear_data()

        still_searching = True
        search_page = 1
        while still_searching:
            link_text_expired_listings = "https://www.trademe.co.nz/Browse/SearchResults.aspx?sort_order=bids_asc&from=advanced&advanced=true&searchstring=" + self.search_term + "&current=0&cid=0&rptpath=all&searchregion=100&page=" + str(
                search_page)

            try:
                #make the request and soup
                exp_res = requests.get(link_text_expired_listings)
                exp_res.raise_for_status()
                expired_search_result_soup = bs4.BeautifulSoup(
                    exp_res.text, features="html.parser")
            except requests.exceptions.HTTPError as err:
                still_searching = False
                print(
                    "an HTTP error occured fetching expired listings under the search "
                    + self.search_term)

            #go through all the listings on this page, checking to see if they have bids
            raw_listings_this_page = expired_search_result_soup.find_all(
                "li", class_="listingCard")
            for listing in raw_listings_this_page:
                if "Current bid" in listing.text:  #the current bid section in the html indicates if bid/s have been placed on the item.
                    #get the link and make a listing object.
                    listing_link = listing.find('a', href=True)['href']
                    #if the listing link is for property or motors, prevent it being made into a listing object
                    if "/property/" in listing_link or "/motors/" in listing_link or "/farming-forestry/" in listing_link:
                        print("found a bad item, link: " + listing_link)
                    else:
                        this_listing = Listing(
                            "https://www.trademe.co.nz" + listing_link,
                            self.id)
                        self.expired_listings.append(this_listing)
                else:
                    #stop searching if the script hits listings without bids on the page.
                    still_searching = False

            #stop searching if there are no more listings.
            listing_count_text_parts = expired_search_result_soup.find(
                'p', class_="listing-count-holder").text.split(" ")
            if listing_count_text_parts[0] == listing_count_text_parts[-1]:
                still_searching = False

            search_page += 1

        #---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
        #clean the listings that were found

        #clean out all the listings that:
        ## are not from the same category as the search term.
        i = 0
        while i < len(self.expired_listings):
            if self.category.lower(
            ) in self.expired_listings[i].category.lower():
                i += 1
            else:
                del self.expired_listings[i]

        ## contain too many excluded terms TODO: this doesnt appear to be working properly, it's not important for now, but it will help refine results.
        while i < len(self.expired_listings):
            excluded_word_count = len([
                ele for ele in self.excluded_terms
                if (ele in self.expired_listings[i].description.lower())
            ]) + len([
                ele for ele in self.excluded_terms
                if (ele in self.expired_listings[i].listingName.lower())
            ])

            if excluded_word_count > self.max_excluded_terms:
                print(
                    "listing ID {} contained too many excluded terms. The listing will not be recorded."
                    .format(self.expired_listings[i].id))
                del self.expired_listings[i]
            else:
                i += 1

        print(
            "finished finding expired listings for the search term '{}', returned {} results"
            .format(self.search_term, len(self.expired_listings)))

        #---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
        #fetch all the long term data.
        expired_listing_count = len(self.expired_listings)

        #find how many current listings there are.
        link_text = "https://www.trademe.co.nz/Browse/SearchResults.aspx?searchString=" + self.search_term + "&type=Search&searchType=all&user_region=100&user_district=0&generalSearch_keypresses=5&generalSearch_suggested=0&generalSearch_suggestedCategory="

        #make the request and soup
        res = requests.get(link_text)
        res.raise_for_status()
        search_result_soup = bs4.BeautifulSoup(res.text,
                                               features="html.parser")

        #get the number of results returned.
        current_listing_count_str = search_result_soup.find_all(
            "h3", {
                "class":
                "tm-search-header-result-count__heading ng-star-inserted"
            })[0].get_text()
        current_listing_count = int(current_listing_count_str.split(" ")[2])

        #get the median sale price of sold listings.
        sold_listings_prices = []
        for listing in self.expired_listings:
            sold_listings_prices.append(listing.get_sell_price())

        median_sell_price = statistics.median(sold_listings_prices) if len(
            self.expired_listings) > 0 else None

        #finally, make the long term data tuple so that these statistics can be recorded in MySQL.
        #format: (search_id, date, active_listings, sold_listings, median_sell_price)
        date = str(datetime.datetime.now())
        self.long_term_data = (self.id, date, current_listing_count,
                               expired_listing_count, median_sell_price)

        self.fetched_data = True
Esempio n. 9
0
 def _WriteToCache(self, listing: Listing):
     with open(os.path.join(self.directory, listing.id()), "w") as f:
         f.write(jsonpickle.encode(listing))
Esempio n. 10
0
def parse_node(node):
    children = itol(node.children)

    listing = Listing()

    try:

        # first row
        chillen = itol(children[0])
        listing.mls = chillen[0].find_all(text=re.compile('\d{8}'))[0].strip()
        listing.status = parse_string(chillen[1], 'Status: ')
        ginz = chillen[2].find_all(text=re.compile('\d*'))
        listing.dom = int(ginz[1].strip())
        listing.dto = int(ginz[3].strip())
        listing.sale_price = parse_money(chillen[3], 'Sale Price: ')
        listing.list_price = parse_money(chillen[4], 'List Price: ')

        # second row
        in_ = itol(children[1])
        address1 = in_[0].getText()
        listing.price_sqft_list = Money(str(parse_sqft(in_[1])), 'USD')
        listing.sale_date = parse_date(in_[2])
        listing.list_date = parse_date(in_[3])

        # third row
        da = itol(children[3])
        address2 = da[0].getText()
        listing.address = address1 + ' ' + address2
        listing.price_sqft_sold = Money(str(parse_sqft(da[1])), 'USD')
        listing.off_mkt_date = parse_date(da[2])
        listing.orig_price = parse_money(da[3], 'Orig. Price:  ')

        # fourth row
        club = itol(children[4])
        listing.style = parse_string(club[0], 'Style: ')
        listing.outdoor_space = parse_string(club[1], 'Outdoor Space: ')
        listing.assoc_fee = parse_money(club[2], 'Assoc.Fee: ')

        # fifth row
        bottle = itol(children[6])
        listing.rooms = parse_int(bottle[0], 'Rooms: ')
        listing.beds = parse_int(bottle[1], 'Beds: ')
        listing.baths = parse_string(bottle[2], 'Baths: ')
        listing.living_area = parse_int(bottle[3], 'Living Area: ')
        listing.tax = parse_money(bottle[4], 'Tax: ')

        # sixth row
        full = itol(children[8])
        listing.garage = parse_int(full[0], 'Garage: ')
        listing.parking = parse_int(full[1], 'Parking: ')
        listing.pets = parse_string(full[2], 'Pets: ')
        listing.year_built = parse_int(full[3], 'Year Built: ')
        listing.fy = parse_int(full[4], 'Fy: ')

        # seventh row
        of_bub = itol(children[10])
        listing.remarks = parse_string(of_bub[0], 'Remarks: ')
    except:
        # I'm a monster
        listing.remarks = u''
        pass

    return listing
Esempio n. 11
0
def write_listing(job_title, job_link, org_name, source, date_posted):
    new_listing = Listing(job_title, job_link, org_name, source, date_posted)
    db.session.add(new_listing)
    db.session.commit()
Esempio n. 12
0
listings = []
i = 1
while True:
    params = {"p": i}
    r = requests.get(REQUEST_URL_PREFIX, params=params)
    if r.text is None or search(REGEX, r.text) is None:
        break
    for match in finditer(REGEX, r.text):
        domain = match.group(1)
        bitcoin_price = match.group(2)
        namecoin_price = match.group(3)
        litecoin_price = match.group(4)
        peercoin_price = match.group(5)
        primecoin_price = match.group(6)
        prices = {
            "bitcoin_price": bitcoin_price,
            "namecoin_price": namecoin_price,
            "litecoin_price": litecoin_price,
            "peercoin_price": peercoin_price,
            "primecoin_price": primecoin_price
        }
        listings.append(Listing(domain, prices, datetime.now()))
    i += 1
    sleep(0.5)

if not exists(OUTPUT_DIR):
    makedirs(OUTPUT_DIR)
filename = datetime.now().strftime("%Y-%m-%d-%H_%M.pickle")
with open(join(OUTPUT_DIR, filename), "wb") as output_file:
    dump(listings, output_file, protocol=HIGHEST_PROTOCOL)