def get_and_save_event_information(data): """Function takes raw JSON data from ticketmaster API, parses the useful data and stores it into the database :param data: Json data we got from the database :type data: Dictionary """ if data != None: events = data['_embedded']['events'] artist_id = None venue_id = None for event in events: artist_name = event['name'] # name person date = event['dates']['start']['localDate'] # date venue_name = event['_embedded']['venues'][0]['name'] # venue venue_city = event['_embedded']['venues'][0]['city'][ 'name'] # city venue_state = event['_embedded']['venues'][0]['state'][ 'stateCode'] #state #checking if event artist or venue already exists in the datbase before addding it try: if not Artist.objects.filter(name=artist_name).exists(): artist = Artist(name=artist_name) artist.save() artist_id = artist.id #getting the id to be used in the show except IntegrityError as e: artist = Artist.objects.get(name=artist_name) artist_id = artist.id print(e) try: if not Venue.objects.filter(name=venue_name): venue = Venue(name=venue_name, city=venue_city, state=venue_state) venue.save() venue_id = venue.id except IntegrityError as e: venue = Venue.objects.get(name=venue_name) venue_id = artist.id print(e) try: if artist_id != None and venue_id != None: show = Show(show_date=date, artist=Artist.objects.get(pk=artist_id), venue=Venue.objects.get(pk=venue_id)) show.save() except IntegrityError as e: print(e)
def test_add_duplicate_show(self): #had to do this in a two step process otherwise the test would not pass venue = Venue(name='cool_stadium', city='Minneapolis', state='MN') venue.save() artist = Artist(name='bobby') artist.save() Show(show_date='2021-07-28 11:00:00', artist=artist, venue=venue).save() with self.assertRaises(IntegrityError): Show(show_date='2021-07-28 11:00:00', artist=artist, venue=venue).save()
def setUp(self): user = User(username='******', password='******') user.save() # Set up provides sample data for your test # Data destroyed after the test is done # Creating 5 Shows, Artist, Notes and Venues for i in range(5): artist = Artist(name=f'Number{i+1}') artist.save() venue = Venue(name=f'venue{i+1}', city=f'city{i+1}', state=f'state{i+1}') venue.save() show = Show(show_date=timezone.now(), artist=Artist(id=i + 1), venue=Venue(i + 1)) show.save() note = Note(show=Show(f'{i+1}'), user=User(id=1), title=f'title{i+2}', text=f'text{i+1}') note.save() # Creating more note for show 1 for i in range(4): more_note = Note(show=Show(f'{1}'), user=User(id=1), title=f'title{2}', text=f'text{2}') more_note.save() # Creating more note for show 2 for i in range(3): more_note = Note(show=Show(f'{2}'), user=User(id=1), title=f'title{2}', text=f'text{2}') more_note.save() # Creating more note for show 3 for i in range(2): more_note = Note(show=Show(f'{3}'), user=User(id=1), title=f'title{2}', text=f'text{2}') more_note.save() # Creating more note for show 4 for i in range(1): more_note = Note(show=Show(f'{4}'), user=User(id=1), title=f'title{2}', text=f'text{2}') more_note.save()
def test_user_can_add_filled_data(self): user = User(username='******', email='*****@*****.**') user.save() artist = Artist(name='Nym', hometown='Place', description='A Band') artist.save() venue = Venue(name='Arena', address='123 Location') venue.save() show = Show(show_date=datetime.now(), artist=artist, venue=venue) show.save() form_data = { 'twitter_username': '******', 'bio': 'Fake biography.', 'favorite_artist': artist, 'favorite_show': show } form = ProfileForm(form_data) self.assertTrue(form.is_valid())
def setUp(self): user = User(username='******', password='******') user.save() for i in range(3): artist = Artist(name=f'Number{i+1}') artist.save() venue = Venue(name=f'venue{i+1}', city=f'city{i+1}', state=f'state{i+1}') venue.save() # timezone will check the django settings to get the appropriate time for now show = Show(show_date=timezone.now(), artist=Artist(id=i + 1), venue=Venue(i + 1)) show.save() note = Note(show=Show(f'{i+1}'), user=User(id=1), title=f'title{i+1}', text=f'text{i+1}') note.save()
def process_item(self, item, spider): if isinstance(item, Scraping.items.Venue): venue = Venue(name=item['name'], city=item['city'], state=item['state']) if not Venue.objects.filter(name=item['name']).exists(): venue.save() return item if isinstance(item, Scraping.items.Event): artist = Artist_Model(name=item['artist']) if not Artist_Model.objects.filter(name=item['artist']).exists(): artist.save() if not Show.objects.filter(url=item['url']).exists(): date_object = datetime.strptime(item['date'], '%A, %B %d, %Y') artist = Artist_Model.objects.get(name=item['artist']) venue = Venue.objects.get(name=item['venue']) show = Show(artist = artist, venue = venue, name = item['name'], url = item['url'], time = item['time'], ages = item['ages'], show_date = date_object) show.save() return item
def test_no_duplicate_show_saved_to_dbase(self): artist3 = Artist(name='George Gershwin') artist3.save() artist3.id venue3 = Venue(name='U.S.A. Bank Stadium', city='Minneapolis', state='MN') venue3.save() venue3.id show3 = Show(show_date='2021-07-31 22:00:00', artist=artist3, venue=venue3) show3.save() #try to add a duplicate show show4 = Show(show_date='2021-07-31 22:00:00', artist=artist3, venue=venue3) with self.assertRaises(IntegrityError): show4.save()
def setUp(self): user = User(username='******', password='******') user.save() # Set up provides sample data for your test # sample data will be destroyed after test for i in range(5): artist = Artist(name=f'Number{i+1}') artist.save() venue = Venue(name=f'venue{i+1}', city=f'city{i+1}', state=f'state{i+1}') venue.save() show = Show(show_date=timezone.now(), artist=Artist(id=i + 1), venue=Venue(i + 1)) show.save() note = Note(show=Show(f'{i+1}'), user=User(id=1), title=f'title{2}', text=f'text{2}') note.save() # Adding 4 more notes to show 1 for i in range(4): more_note = Note(show=Show(f'{1}'), user=User(id=1), title=f'title{2}', text=f'text{2}') more_note.save() # Adding 3 more notes to show 2 for i in range(3): more_note = Note(show=Show(f'{2}'), user=User(id=1), title=f'title{2}', text=f'text{2}') more_note.save()
def scrape_first(): for page_number in range(30): # Loop over the first 30 pages on the first avenue website url = f'https://first-avenue.com/shows/page/{page_number}/?orderby=past_shows' try: r = requests.get(url) soup = BeautifulSoup(r.content, 'html.parser') except Exception as e: print(e) # selecting elements with <div class="d-flex flex-column h-100 flex-fill"> container_object = soup.find_all(class_="h-100") # finds children to pull out date information, artist name, and venue name for html_item in container_object: day_bs4_result_set = html_item.select('.day') # checks if this is an appropriate entry, otherwise we capture bad data if day_bs4_result_set: try: band_name_bs4_result_set = html_item.select('a') band_name = str(band_name_bs4_result_set[0].text).strip() a = Artist(name=band_name) a.save() print(f'created new artist named {a.name}') except django.db.utils.IntegrityError as e: print('Duplicate Artist entry, not added.') except Exception as e: print(e) try: venue_name_bs4_result_set = html_item.select('.venue_name') venue_name = str(venue_name_bs4_result_set[0].text).strip() v = Venue(name=venue_name, city='Minneapolis', state='MN') v.save() print(f'created new venue named {v.name}') except django.db.utils.IntegrityError as e: print('Duplicate Venue entry, not added.') except Exception as e: print(e) try: day_bs4_result_set = html_item.select('.day') if day_bs4_result_set: day = str(day_bs4_result_set[0].text).strip() # results are beautifulsoup4 objects if len(day) == 1: # we need DD format later day = '0' + day month_bs4_result_set = html_item.select('.month') month_char_format = str(month_bs4_result_set[0].text).strip() month = month_dict[month_char_format] year_bs4_result_set = html_item.select('.year') year = str(year_bs4_result_set[0].text).strip() event_date = year + '-' + month + '-' + day date_time = date.fromisoformat(event_date) print(date_time) # this part below is the issue, "save() prohibited to prevent data loss due to unsaved related object 'venue'." s = Show(show_date=date_time, artist=Artist.objects.filter(name__icontains=band_name)[0], venue=Venue.objects.filter(name__icontains=venue_name)[0]) s.save() print(f'created new show on {date_time}') except django.db.utils.IntegrityError as e: print('Duplicate Venue entry, not added.') except Exception as e: print(e)
def scrape_first(): """This function uses requests and beautifulsoup to get data from https://first-avenue.com/shows/, The function iterates over the last 30 pages, identifies the html container with the info we want, and gets artist name, venue name, show date :param container_object: Constructed with beautifulsoup library from provided url :type container_object: Obj ... :raises django.db.utils.IntegrityError: """ for page_number in range( 15): # Loop over the first 30 pages on the first avenue website url = f'https://first-avenue.com/shows/page/{page_number}/?orderby=past_shows' try: r = requests.get(url) soup = BeautifulSoup(r.content, 'html.parser') except Exception as e: print(e) # selecting elements with <div class="d-flex flex-column h-100 flex-fill"> container_object = soup.find_all(class_="h-100") # finds children to pull out date information, artist name, and venue name for html_item in container_object: day_bs4_result_set = html_item.select( '.day' ) # checks if this is an appropriate entry, otherwise we capture bad data if day_bs4_result_set: try: band_name_bs4_result_set = html_item.select( 'a') # this item should be the band's name band_name = str(band_name_bs4_result_set[0].text).strip() """Creates a new Artist instance :param band_name: name of band :type band_name: str """ a = Artist(name=band_name) a.save() print(f'created new artist named {a.name}') except django.db.utils.IntegrityError as e: print('Duplicate Artist entry, not added.') break except Exception as e: print(e) try: venue_name_bs4_result_set = html_item.select('.venue_name') venue_name = str(venue_name_bs4_result_set[0].text).strip() """Creates a new Venue instance :param name: name of music venue :type name: str :param city: city inwhich venue is located :type city: str :param state: state inwhich venue is located :type state: str """ v = Venue(name=venue_name, city='Minneapolis', state='MN') v.save() print(f'created new venue named {v.name}') except django.db.utils.IntegrityError as e: print('Duplicate Venue entry, not added.') except Exception as e: print(e) try: day_bs4_result_set = html_item.select('.day') if day_bs4_result_set: day = str(day_bs4_result_set[0].text).strip( ) # results are beautifulsoup4 objects if len( day ) == 1: # check to see if day is in range 1-9, needs '0' added before if so day = '0' + day month_bs4_result_set = html_item.select('.month') month_char_format = str( month_bs4_result_set[0].text).strip() month = month_dict[month_char_format] year_bs4_result_set = html_item.select('.year') year = str(year_bs4_result_set[0].text).strip() event_date = year + '-' + month + '-' + day date_time = date.fromisoformat(event_date) """Created new show instance :param show_date: date show was performed :type show_date: datetime :param artist_id: fk for artist table :type artist_id: int :param venue_id: fk for venue table :type venue_id: int """ s = Show(show_date=date_time, artist=Artist.objects.filter( name__icontains=band_name)[0], venue=Venue.objects.filter( name__icontains=venue_name)[0]) s.save() print(f'created new show on {date_time}') except django.db.utils.IntegrityError as e: print('Duplicate Show entry, not added.') except Exception as e: print(e)