def main(): # Initialize bot SLACK_CHANNEL = "#synth-bot-v2" SLACK_CHANNEL_ID = "C013RL4N1EJ" client = WebClient(token=creds["bot-token"]) # Create search queries posts_SD = CraigslistForSale(site="sandiego", filters={'query': "eurorack"}) posts_OC = CraigslistForSale(site="orangecounty", filters={'query': "eurorack"}) posts_ALL = {"SD": posts_SD, "OC": posts_OC} # Get posts via python-craigslist df = get_posts(posts_ALL) # Get blacklist urls from Slack via reactions blacklist_urls = compile_blacklist(SLACK_CHANNEL_ID, client) # Compile into text for bot text_posts = [] for i, row in df.iterrows(): if row['url'] not in blacklist_urls: text_posts.append( f"{row['name']} | ${row['price']} | {row['where']}\nUpdated {row['updated']} days ago | Created {row['created']} days ago\n{row['url']}\n\n" ) # Check if there are any new messages if len(text_posts) == 0: text_posts.append("No new synths today!") # Send messages send_messages(SLACK_CHANNEL, text_posts, client)
def searchCL(searchTerms): print "searching craigslist..." locations = [('sandiego', 'nsd'), ('sandiego', 'csd'), ('sandiego', 'ssd'), ('sandiego', 'esd')] cl_pricelist = pd.DataFrame() for site, area in locations: # CraigslistForSale.show_filters() cl_h = CraigslistForSale(site='sandiego', area='csd', category='pha', filters={ 'query': searchTerms, 'bundle_duplicates': 'True' }) response = cl_h.get_results(sort_by='newest', geotagged=True) for item in response: cl_pricelist = cl_pricelist.append(item, ignore_index=True) # data clean-up cl_pricelist['price'] = cl_pricelist['price'].astype(str).str.replace( '$', '').astype(float) # check that there aren't duplicates, by comparing description, price, etc return cl_pricelist
def query_craig(self): if self.verbose: print('# query_craig()' ) if self.veryverbose: print( self.info['filters'] ) cl_a = CraigslistForSale( site=self.info['site'], area=self.info['area'], category=self.info['category'], filters=self.info['filters'] ) limit = 0 for result in cl_a.get_results(sort_by='newest'): record_time = car_util.time_object( result['datetime'],'%Y-%m-%d %H:%M' ) clean_time = car_util.time_object( self.info['since-date'],'%Y-%m-%dT%H:%M:%SZ' ) if record_time > clean_time: xCar = car_info.car( {'url': result['url']} ).update_info() # print('x' + str(xCar.info) ) while xCar.error != '': print('ErroR ' + str(xCar.error) + result['url']) if xCar.error == 408: # Request Timeout print( ' time problem, wait 3 and try again ' ) xCar = car_info.car( {'url': result['url']} ).update_info() time.sleep(3) if car.error == 404: # Request not found print( ' not found!! ') xCar.info['sold-date'] = datetime.datetime.utcnow() xCar.save_me() xCar.post_solr() break time.sleep(1) limit += 1
def main(argv): craigslist_sites = parse_craigslist.parse_craigslist_sites() make, model = utilities.get_parameters() for location in craigslist_sites: file_name = '' + location + '_' + make + '_' + model + '' folder = '../FileDump/' print('Searching the ' + location + ' craigslist site for ' + make + ' ' + model) cl_s = CraigslistForSale(site=location, filters={ 'make': make, 'model': model, 'min_price': 2000 }) results = cl_s.get_results() results_df = parse_craigslist.parse_results(results) list_of_dics = parse_craigslist.parse_vehicle_urls(results_df) vehicles_df = pd.DataFrame(list_of_dics) vehicles_df.index.name = 'VehicleKey' if not os.path.exists("../FileDump"): os.makedirs("../FileDump") vehicles_df.to_csv(folder + file_name, sep='|')
def scrape(): """Scrapes craigslist and finds the latest listings.""" filters = {'query': settings.QUERY} cl = CraigslistForSale(site=settings.CRAIGSLIST_SITE, filters=filters) results = [] gen = cl.get_results(sort_by='newest', geotagged=True, limit=20) while True: try: result = next(gen) except StopIteration: break except Exception: continue listing = session.query(Listing).filter_by(cl_id=result["id"]).first() # Don't store the listing if it already exists. if listing is not None: continue # Create the listing object listing = Listing(result) # Save the listing so we don't grab it again session.add(listing) session.commit() results.append(result) return results
def generate_craig_search(searchinput1): word = searchinput1.split() vancouver = CraigslistForSale(site='vancouver', filters={'query': word}) craigslist_price = [] i = 0 for result in vancouver.get_results(geotagged=True): search_output = result search_output['price'] = search_output["price"].replace("$", "") craigslist_price.append(float(search_output['price'])) print(search_output) i = i + 1 if i == 30: break return craigslist_price
def do_scrape(): all_results = [] q = 'concept 2' for area in settings.AREAS: for section in settings.SECTIONS: time.sleep(2) print("Searching for {} in {} in {}".format(q, section, area)) cl = CraigslistForSale(site=settings.CRAIGSLIST_SITE, area=area, category=section, filters={ 'max_price': settings.MAX_PRICE, 'min_price': settings.MIN_PRICE, 'query': q, 'search_titles': 'T' }) all_results += scrape_area(cl, area, section) q = 'aether backpack' for area in settings.AREAS: for section in settings.SECTIONS: time.sleep(2) print("Searching for {} in {} in {}".format(q, section, area)) cl = CraigslistForSale(site=settings.CRAIGSLIST_SITE, area=area, category=section, filters={ 'max_price': 170, 'min_price': 50, 'query': q, 'search_titles': 'T' }) all_results += scrape_area(cl, area, section) q = 'gregory backpack' for area in settings.AREAS: for section in settings.SECTIONS: time.sleep(2) print("Searching for {} in {} in {}".format(q, section, area)) cl = CraigslistForSale(site=settings.CRAIGSLIST_SITE, area=area, category=section, filters={ 'max_price': 160, 'min_price': 60, 'query': q, 'search_titles': 'T' }) all_results += scrape_area(cl, area, section) print("{}: Got {} results".format(time.ctime(), len(all_results))) sc = SlackClient(settings.SLACK_TOKEN) for result in all_results: post_listing_to_slack(sc, result)
def search(self, search_term): master_results = [] results = CraigslistForSale(filters={'query': search_term, 'posted_today': True}) for result in results.get_results(sort_by='newest', geotagged=True): pprint.pprint(result) master_results.append(result) return master_results
def query_craigslist(): print('query_craigslist') cl_h = CraigslistForSale(site='seattle', category='foa', filters={'query': 'xxx'}) # for result in cl_h.get_results(sort_by='newest'): # print(result) return cl_h.get_results(sort_by='newest')
def get_listings(self, query=None, max_price=None): filters = {} if query: filters['query'] = query if max_price: filters['max_price'] = max_price cl_fs = CraigslistForSale(site=self.site, category=self.category, filters=filters) return cl_fs.get_results(sort_by='newest')
def __init__(self, query=None): prev = os.path.dirname(os.getcwd()) db = os.path.join(prev, 'database', 'craigslist_results.db') self.conn = self.connect_db(db) if query is not None: self.craig = CraigslistForSale(site='sandiego', filters={'query' : query}) # Fill db with queried items now self.sql_init(query)
def scrape_area(area): """ Scrapes craigslist for a certain geographic area, and finds the latest listings. :param area: :return: A list of results. """ cl_fs = CraigslistForSale(site=settings.CRAIGSLIST_SITE, area=area, category=settings.CRAIGSLIST_FORSALE_SECTION, filters={'make': 'triumph'}) results = [] gen = cl_fs.get_results(sort_by='newest', limit=150) while True: try: result = next(gen) except StopIteration: break except Exception: continue listing = session.query(Listing).filter_by(cl_id=result["id"]).first() # Don't store the listing if it already exists. if listing is None: # Try parsing the price. price = 0 try: price = float(result["price"].replace("$", "")) except Exception: pass # Create the listing object. listing = Listing( link=result["url"], created=parse(result["datetime"]), ## lat=lat, ## lon=lon, name=result["name"], price=price, ## location=result["where"], cl_id=result["id"], ## area=result["area"], ## bart_stop=result["bart"] ) # Save the listing so we don't grab it again. session.add(listing) session.commit() if len(result["name"]) > 0: results.append(result) return results
def scrape(): """ Scrapes craigslist for a certain geographic area, and finds the latest listings. :param area: :return: A list of results. """ cl = CraigslistForSale(site=settings.CRAIGSLIST_SITE, category=settings.CATEGORY, filters={ 'max_price': settings.MAX_PRICE, 'min_price': settings.MIN_PRICE, 'has_image': True }) results = [] gen = cl.get_results(sort_by='newest', geotagged=True, limit=20) while True: try: result = next(gen) except StopIteration: break except Exception: continue listing = session.query(Listing).filter_by(cl_id=result["id"]).first() # Don't store the listing if it already exists. if listing is None: if result["where"] != 'Ann Arbor': continue # Try parsing the price. price = 0 try: price = float(result["price"].replace("$", "")) except Exception: pass # Create the listing object. listing = Listing( link=result["url"], created=parse(result["datetime"]), name=result["name"], price=price, location=result["where"], cl_id=result["id"], ) # Save the listing so we don't grab it again. session.add(listing) session.commit() results.append(result) return results
def scrape_area(area): """ Scrapes craigslist for a certain geographic area, and finds the latest listings. :param area: :return: A list of results. """ cl_fs = CraigslistForSale(site=settings.CRAIGSLIST_SITE, area=area, category=settings.CRAIGSLIST_FORSALE_SECTION, filters = {'make': 'triumph'}) results = [] gen = cl_fs.get_results(sort_by='newest', limit=150) while True: try: result = next(gen) except StopIteration: break except Exception: continue listing = session.query(Listing).filter_by(cl_id=result["id"]).first() # Don't store the listing if it already exists. if listing is None: # Try parsing the price. price = 0 try: price = float(result["price"].replace("$", "")) except Exception: pass # Create the listing object. listing = Listing( link=result["url"], created=parse(result["datetime"]), ## lat=lat, ## lon=lon, name=result["name"], price=price, ## location=result["where"], cl_id=result["id"], ## area=result["area"], ## bart_stop=result["bart"] ) # Save the listing so we don't grab it again. session.add(listing) session.commit() if len(result["name"]) > 0: results.append(result) return results
def get_result(query): filters['query'] = query cl_h = CraigslistForSale(site='vancouver', area={'van', 'nvm', 'bnc', 'rds', 'pml', 'rch'}, category='cta', filters=filters) for result in cl_h.get_results(sort_by='newest', limit=3): print result['name'] print result['url'] print result['price'] print result['datetime'] print '---------------------------------------'
def scrape(): with open('data.pickle','rb') as fp: cl_listings = pickle.load(fp) #scrape for data cl_query = CraigslistForSale(site=YOUR_STATE, filters={'max_price':250,'query':'ikea desk'}) for result in cl_query.get_results(sort_by='newest'): print(result) if result not in cl_listings: cl_listings.append(result) with open('data.pickle','wb') as fp: pickle.dump(cl_listings,fp)
def search(filters): try: from craigslist import CraigslistForSale clobject = CraigslistForSale(site='sfbay', area='', category='sss', filters=filters) response = clobject.get_results(sort_by='newest', limit=500) return None if not response else [{ "query": filters["query"], "name": item['name'], "url": item['url'], "price": item['price'], "location": item['where'] } for item in response] except: raise Exception
def search(): for search_config in config['scans']['for_sale']: print('Scanning ForSale') print(search_config) for site in search_config['sites']: craigslist_query = CraigslistForSale( site=site, category=search_config['category'], filters=search_config['filters']) for result in craigslist_query.get_results(sort_by='newest', limit=50): print('Web Result') print(result) if len(listings_table.search(Listing.id == result['id'])) == 0: print('Detected new listing') print(result) listings_table.insert(result) pending_notifications_table.insert(result)
def search_results(request): all_results = [] settings = Settings.objects.first() search_interval_in_minutes = settings.search_interval_minutes for query in settings.search_terms.split(", "): part = {} part["query"] = query part_results = [] cl_searcher = CraigslistForSale(site=settings.search_site, filters={ "query": query, "posted_today": True, "zip_code": settings.search_zip_code, "search_distance": settings.search_radius, "search_titles": True }) for result in cl_searcher.get_results(sort_by='newest', geotagged=True): search_result = SearchResult() search_result.name = result["name"].title( ) if result["name"] else "" search_result.price = result["price"] if result["price"] else "" search_result.location = result["where"].title( ) if result["where"] else "" search_result.url = result["url"] part_results.append(search_result) if part_results: part["results"] = part_results all_results.append(part) extra_title_text = " - last checked {}".format( datetime.datetime.now().strftime("%I:%M:%S %p")).replace(" 0", " ") return render( request, 'search/search_results.html', { 'all_results': all_results, 'search_interval_in_minutes': search_interval_in_minutes, 'extra_title_text': extra_title_text })
def search(query): cl_s = CraigslistForSale(site='lasvegas', category='fua', filters={ 'zip_code': '89117', 'search_titles': True, 'posted_today': False, 'query': query, 'search_distance': '30' }) for result in cl_s.get_results(sort_by='newest'): id = result['id'] exists = fetch_query(id) if (not exists): insert_row(result['repost_of'], result['datetime'], result['url'], result['price'], result['name'], result['id']) conn.commit() row = fetch_query(id) text(row[2].encode('ascii', 'ignore'))
def main(): """Run the main procedure""" for query in get_queries(): search = query.get('query') city = query.get('city', get_default('city')) sort_by = query.get('sort_by', get_default('sort_by')) category = query.get('cat', None) max_results = query.get('max', get_default('max_results')) posted_today = query.get('posted_today', get_default('posted_today')) print('\nRUNNING QUERY:'.upper()) print(f' - city: {city}') print(f' - query: {search}') print(f' - sort_by: {sort_by}') print(f' - category: {category}') print(f' - max_results: {max_results}') print(f' - posted_today: {posted_today}') # Custom category for gym if category == 'gym': category = 'sss?excats=7-13-22-2-24-1-23-2-1-1-2-9-10\ -1-1-1-2-2-8-1-1-1-1-1-4-1-3-1-3-1-1-1-1-7-1-1-\ 1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-2-2-1-1-1-2-1-1\ -1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-2-1' for_sale = CraigslistForSale(site=city, category=category, filters={ 'query': search, 'has_image': True, 'posted_today': posted_today }) results = for_sale.get_results(sort_by=sort_by, geotagged=True, include_details=True) if MODE == 'email': email_results(results, max_results) else: print(f'mode {MODE} not supported')
def scrape_whips(): # Scrape Craigslist for whips. cl_fs = CraigslistForSale(site=settings.CRAIGSLIST_SITE, area='van', category=settings.CRAIGSLIST_AUTO_SECTION, filters={'min_price': settings.MIN_PRICE_WHIPS, 'max_price': settings.MAX_PRICE_WHIPS, 'query': settings.WHIPS_INCLUDED_TERMS, 'search_titles': True}) results = [] for result in cl_fs.get_results(sort_by='newest', limit=settings.LIMIT, include_details=True): results.append(result) # Filter scraped results for included terms. whips = [] included = 0 for result in results: whip = session.query(Whips).filter_by(cl_id=result['id']).first() # Don't store the whip whip if it already exists. if whip is None: whip = Whips( cl_id=result['id'], link=result['url'], created=parse(result['datetime']), name=result['name'], price=f"${format(float(result['price'][1:]), ',.0f')} CAD", location=result['where'], body=result['body'], image=result['images'][0] ) included += 1 whips.append(result) # Save whip so we don't grab it again. session.add(whip) session.commit() print(f'{time.ctime()}: Found {included} new whips.') # Create slack client. sc = SlackClient(settings.SLACK_TOKEN) # Post each result to Slack. for whip in whips: post_whip_to_slack(sc, whip)
def do_scrape(): cl = CraigslistForSale( site="newjersey", category="sss?query=MACBOOK&sort=rel&search_distance=5&postal=07307") results = cl.get_results(sort_by="newest", geotagged=True, limit=10) for result in results: SLACK_TOKEN = "xoxp-452187027302-450086424528-450729972611-cb8062777467a37f65e543f519b73687" SLACK_CHANNEL = "#craigslist" sc = SlackClient(SLACK_TOKEN) desc = "{0} | {1} | {2} | {3} | <{4}>".format(result["name"], result["datetime"], result["price"], result["geotag"], result["url"]) sc.api_call("chat.postMessage", channel=SLACK_CHANNEL, text=desc, username="******", icon_emoji="robot_face:")
def __get_listings_for_city(self, city, search_filter: CraigslistSearchParams): try: cl = CraigslistForSale( site=city, category="ava", filters={ "max_price": search_filter.max_price, "min_price": search_filter.min_price, "query": search_filter.title, "search_titles": True, }, ).get_results(include_details=True) return cl except ValueError: print("invalid site {}".format(city)) return []
def main(): filters = { 'min_price': settings.MIN_PRICE, 'max_price': settings.MAX_PRICE, } results = DotMap() for site in settings.CRAIGSLIST_SITES: temp_results = CraigslistForSale(site=site, category=settings.CRAIGSLIST_CATEGORY, filters=filters).get_results( sort_by='newest', limit=settings.RESULTS_PER_RUN, geotagged=True) results[site] = [] for result in temp_results: results[site].append(result) results.pprint()
logging.info('Setting up the instagram and twitter accounts') insta_app = insta.insta_use(tt.insta_login) twit_app = twit.twit_use(tt.twitter_auth_keys) #iterate through all the sites in the csv read in for i, j in site_list.iterrows(): #logging.info(j['site']) #run the search on the specific site cl_fs_car = CraigslistForSale( site=j['site'], category=cat_use, filters={ 'query': query_use, 'has_image': True, 'search_titles': True #, 'auto_transmission':'manual' , 'auto_fuel_type': ['gas', 'hybrid', 'electric', 'other'], 'max_year': '2000', 'auto_title_status': ['clean', 'salvage', 'rebuilt', 'lien'] }) for result in cl_fs_car.get_results(): try: date_time_use = convert( result['datetime'] ) #convert the string time from the ad to datetime format logging.info('found result!') if ( date_time_use > datetime_limit ): #check to see if posting time was within the last 24 hours from when the code was run
def scrape_area(): """ Scrapes craigslist for a certain geographic area, and finds the latest listings. :param area: :return: A list of results. """ cl_p = CraigslistForSale(site=settings.CRAIGSLIST_SITE, category='mpa', filters={'max_price': settings.MAX_PRICE}) cl_s = CraigslistForSale(site=settings.CRAIGSLIST_SITE, category='sna', filters={'max_price': settings.MAX_PRICE}) cl_c = CraigslistForSale(site=settings.CRAIGSLIST_SITE, category='mca', filters={'max_price': settings.MAX_PRICE}) results = [] gen = cl_p.get_results(sort_by='newest', limit=10) while True: print("first") try: result = next(gen) except StopIteration: break except Exception: continue print("prelisting") listing = session.query(Listing).filter_by(cl_id=result["id"]).first() print("postlisting") # Don't store the listing if it already exists. if listing is None: print("listingisnone") price = 0 try: print("price") price = float(result["price"].replace("$", "")) except Exception: pass print("here") # Create the listing object. listing = Listing(link=result["url"], name=result["name"], price=price, cl_id=result["id"]) print("there") # Save the listing so we don't grab it again. session.add(listing) session.commit() results.append(result) print("wtf") gen = cl_s.get_results(sort_by='newest', limit=10) while True: print("second") try: result = next(gen) except StopIteration: break except Exception: continue listing = session.query(Listing).filter_by(cl_id=result["id"]).first() # Don't store the listing if it already exists. if listing is None: price = 0 try: price = float(result["price"].replace("$", "")) except Exception: pass # Create the listing object. listing = Listing(link=result["url"], name=result["name"], price=price, cl_id=result["id"]) # Save the listing so we don't grab it again. session.add(listing) session.commit() results.append(result) gen = cl_c.get_results(sort_by='newest', limit=10) while True: print("third") try: result = next(gen) except StopIteration: break except Exception: continue listing = session.query(Listing).filter_by(cl_id=result["id"]).first() # Don't store the listing if it already exists. if listing is None: price = 0 try: price = float(result["price"].replace("$", "")) except Exception: pass # Create the listing object. listing = Listing(link=result["url"], name=result["name"], price=price, cl_id=result["id"]) # Save the listing so we don't grab it again. session.add(listing) session.commit() results.append(result) return results
w = [] count = int(0) category = 'cta' site = 'austin' min_year = 2009 max_year = 2013 min_price = 5000 max_price = 25000 make = 'bmw+328i' for_sale = CraigslistForSale(site=site, category=category, filters={ 'max_price': max_price, 'min_price': min_price, 'min_year': min_year, 'max_year': max_year, 'make': make }) for result in for_sale.get_results(): count += int(1) x.append(count) price = result['price'] price = int(price[1:]) y.append(price) mean = round(np.mean(y), 0) z.append(mean)
try: city = re.findall(r'^https:\/\/([^>]*)\.craigslist.org', link)[0] except: continue if city in ['forums','','www']: continue cities.append(city) conn = sqlite3.connect(r"./carigslist_data_main.db") start = time.time() conn.execute('''CREATE TABLE newdata(City,Miles,ID,Name,Price,date,url,VIN,odometer,status,model)''') for city in cities: print(city) for min_mls in list(range(5000,180000,1000)): cl_h = CraigslistForSale(site=city, category='cta', filters={'min_miles':min_mls, 'max_miles':min_mls+999,'min_price': 1500, 'max_price': 70000,'has_image':True}) id_list=[] for i in cl_h.get_results(): if i['name'] in id_list: continue id_list.append(i['name']) conn.execute("INSERT INTO newdata(City,Miles,ID,Name,Price,date,url,VIN,odometer,status, model) VALUES (?,?,?,?,?,?,?,?,?,?,?)", (city,min_mls,i['id'],i['name'],i['price'], i['datetime'],i['url'],i['VIN'],i['odometer'],i['status'],i['model'])) conn.commit() conn.close() end = time.time() print(end - start) #t=1621.0531091690063
minmiles = config.get('FILTERS', 'minMiles') maxmiles = config.get('FILTERS', 'maxMiles') rList = [] last_insert = '0' ######################## MAIN CL REQUEST ######################## clfs = CraigslistForSale(site=site, category=category, filters={ 'query': squery, 'max_price': int(maxprice), 'min_price': int(minprice), 'zip_code': szipcode, 'search_distance': sdistance, 'has_image': hasimage, 'make': make, 'model': model, 'min_year': minyear, 'max_year': maxyear, 'min_miles': minmiles, 'max_miles': maxmiles }) ###################### START PROGRAM ############################ def init(): totalListings = str(clfs.get_results_approx_count()) Logger.writeAndPrintLine( 'Scraping all ' + totalListings + ' results matching criteria:\n', 0)
def slack_bot(): """Slack Bot""" SLACK_TOKEN = "xoxp-562373075488-562373075840-563122452818-d5336ac18507ab9a272c57a5959c6a51" SLACK_CHANNEL = "#motorcycles" sc = SlackClient(SLACK_TOKEN) desc = "{0} | {1} | <{2}>".format(result["price"], result["name"], result["url"]) sc.api_call( "chat.postMessage", channel=SLACK_CHANNEL, text=desc, username='******', icon_emoji=':robot_face:' ) """Craigslist scraper""" cl = CraigslistForSale(site='minneapolis', category='mca', filters={'max_price': 1500, 'min_price': 250}) """SQL""" engine = create_engine('sqlite:///listings.db', echo=False) Base = declarative_base() class Listing(Base): """ A table to store data on craigslist listings. """ __tablename__ = 'listings'
#*******************************************************************# ######################BEGIN MAIN##################################### #*******************************************************************# if (len(sys.argv) < 3): print str(sys.argv[0]) + '[email] [password]' sys.exit() my_names = 'names.txt' fp = open(my_names, 'a') while (True): cl = CraigslistForSale(site='sacramento', category='cto', filters={ 'max_price': 4000, 'min_price': 3000, 'max_miles': 200000, 'auto_title_status': u'clean', 'auto_transmission': u'manual', 'search_distance': 1000 }) print '**************************************** SEARCH *********************************************' results = cl.get_results(sort_by='newest', geotagged=True, limit=20) my_dates = [] for result in results: nameLower = result['name'].lower() #name of the title out = 'outback' forester = 'forester' #key words to look for in title subaru = 'subaru' if out in nameLower or forester in nameLower or subaru in nameLower: