Esempio n. 1
0
def scrape_dance_cal(keys_from_spreadsheet):
	"""Scrapes dancecal.com. and returns an event instance
	that includes name, start date, end date, country, city, url,
	dance styles, teachers, status, key, and an obsolete value"""
	soup = get_soup(URL_DC)
	for event_div in soup.findAll('div', {'class' : 'DCListEvent'}):
		name = None
		event = Event()
		for span in event_div.findAll('span'):

			if 'DCListName' in span['class']:
				name = span.text.strip()
			print(name)
			if name == None:
				continue
			elif name.lower() in event_name_list:
				# checks to see if the event name already exists in the instance list
				# If it does, it skips it
				continue
			else:
				# This means the event does not already exist in the instance list
				# and will be added
				if 'DCListName' in span['class']:
					event.name = span.text.strip()
					for a_tag in span.findAll('a', href=True):
						event.url = a_tag['href']
				if 'DCEventInfoDate' in span['class']:
					event.start_date = parse(span.text)
					# Now need to guess what the end_date will be since this site does not provide it
					# I'm going to assume that events will tend to end on a Sunday
					# For example, if an event starts on a friday, I will make it's end-date two days later. 
					weekday = event.start_date.weekday()
					gap = datetime.timedelta(days = 6 - weekday)
					event.end_date = event.start_date + gap
				if 'DCEventInfoWhere' in span['class']:
					location_list = span.text.replace(':',',').split(',')
					if len(location_list) == 3:
						event.country = location_list[2].strip()
						event.city = location_list[1].strip()
					if len(location_list) == 4:
						event.country = location_list[3].strip()
						event.state = location_list[2].strip()
						event.city = location_list[1].strip()
				if 'DCEventInfoDances' in span['class']:
					event.dance_styles = span.text.split(': ')[1].lower().strip()
				if 'DCEventInfoTeachers' in span['class']:
					event.teachers = str(span).replace('<br/>', '$').replace(':', '$').replace('</i>', '$').replace('|', 'and').split('$')[1:-1]
				if 'DCEventInfoDesc' in span['class']:
					event.details = span.text.strip()
				if 'DCEventInfoBands' in span['class']:
					event.bands = span.text.split(':')[1].strip()
		if event.name == None:
			pass
		else:
			event.key = create_key(event)
			event_list = append_to_event_list(event, event.key, keys_from_spreadsheet)
	return event_list
Esempio n. 2
0
def create_event_list():
    event_list = []
    for row_number, row in enumerate(
            utils.iter_worksheet(spreadsheet, 'Sheet1', header_row=1)):
        if row['key'] != '' and row['obsolete'] != '1' and row[
                'status'] != 'past':
            event = Event()
            event.key = row['key']
            event.name = row['name']
            event.start_date = parse(row['start date'])
            event.end_date = parse(row['end date'])
            event.city = row['city']
            event.state = row['state']
            event.country = row['country']
            event.dance_styles = row['dance styles']
            event.status = row['status']
            event.url = row['url']
            event.teachers = row['teachers']
            event.bands = row['bands']
            event.details = row['details']
            event.obsolete = row['obsolete']
            event.workshop_cost = get_cost(row, 'workshop cost')
            event.party_pass_cost = get_cost(row, 'party pass cost')
            event.distance = int(row['distance'])
            event.flight_cost = int(row['flight cost'])
            event.event_type = row['type']
            if row['currency'] == '':
                event.currency = 'USD'
            else:
                event.currency = row['currency']
            if row['driving time'] == '':
                event.driving_time = 99999
            else:
                event.driving_time = int(row['driving time'])
            event_list.append(event)
    return event_list