def get_creator(self, tag='dc:creator'): creator = '' if len(self.item.getElementsByTagName(tag)) > 0: creator = util.getNodeText(self.item.getElementsByTagName(tag)[0].childNodes) if creator == None or creator.strip() == '': creator = util.getCData(self.item.getElementsByTagName(tag)[0]) if creator == None or creator.strip() == '': if len(self.item.getElementsByTagName('author')) > 0 and len(self.item.getElementsByTagName('author')[0].getElementsByTagName('name')) > 0: creator = util.getNodeText(self.item.getElementsByTagName('author')[0].getElementsByTagName('name')[0].childNodes) return creator.strip()
def set_locations_from_api(self): params = {"method": "geo.getmetros"} xmlevents = self.call(params) root = xmlevents.documentElement metros = root.getElementsByTagName("metros")[0] for m in metros.getElementsByTagName("metro"): city_name = util.getNodeText(m.getElementsByTagName("name")[0].childNodes) country_name = util.getNodeText(m.getElementsByTagName("country")[0].childNodes) try: dbcountry, created = Country.objects.get_or_create(name=country_name, short_name=slugify(country_name)) City.objects.get_or_create(name=city_name, slug=slugify(city_name), country=dbcountry) except: print "FAILED ON", city_name, country_name
def get_creator(self, tag='dc:creator'): creator = '' if len(self.item.getElementsByTagName(tag)) > 0: creator = util.getNodeText( self.item.getElementsByTagName(tag)[0].childNodes) if creator == None or creator.strip() == '': creator = util.getCData(self.item.getElementsByTagName(tag)[0]) if creator == None or creator.strip() == '': if len(self.item.getElementsByTagName('author')) > 0 and len( self.item.getElementsByTagName('author') [0].getElementsByTagName('name')) > 0: creator = util.getNodeText( self.item.getElementsByTagName('author') [0].getElementsByTagName('name')[0].childNodes) return creator.strip()
def get_title(self): title = util.getNodeText( self.item.getElementsByTagName('title')[0].childNodes) if title.find('CDATA') > -1 or title.strip() == '': title = util.getCData(self.item.getElementsByTagName('title') [0]) # village voice, style peterson return title.strip()
def get_category(self): if self.source.category == '': if (len(self.item.getElementsByTagName('category')) > 0): return util.getNodeText(self.item.getElementsByTagName('category')[0].childNodes) else: return self.source.category return ''
def get_description(self, tag='description'): desc = '' if len(self.item.getElementsByTagName(tag)) > 0: desc = util.getNodeText(self.item.getElementsByTagName(tag)[0].childNodes) if desc == None or desc.strip() == '' or desc.find('CDATA') > -1: desc = util.getCData(self.item.getElementsByTagName(tag)[0]) if desc == None or desc.strip() == '': if len(self.item.getElementsByTagName('content')) > 0: desc = util.getNodeText(self.item.getElementsByTagName('content')[0].childNodes) if desc == None or desc.strip() == '' or desc.find('CDATA') > -1: desc = util.getCData(self.item.getElementsByTagName('content')[0]) if desc == None or desc.strip() == '': if len(self.item.getElementsByTagName('summary')) > 0: desc = util.getNodeText(self.item.getElementsByTagName('summary')[0].childNodes) return desc.strip()
def get_category(self): if self.source.category == '': if (len(self.item.getElementsByTagName('category')) > 0): return util.getNodeText( self.item.getElementsByTagName('category')[0].childNodes) else: return self.source.category return ''
def get_date(self): if (len(self.item.getElementsByTagName('dc:date')) > 0): dateStr = util.getNodeText(self.item.getElementsByTagName('dc:date')[0].childNodes) if dateStr =='': return None #2011-01-01T14:00:18-05:00 date = datetime.datetime.strptime(dateStr[0:dateStr.rfind('-')], datetime_formats.GOTHAMIST_TIME_FORMAT) return date return None
def get_date(self): if (len(self.item.getElementsByTagName('updated')) > 0): dateStr = util.getNodeText(self.item.getElementsByTagName('updated')[0].childNodes) if dateStr =='': return None date = datetime.datetime.strptime(dateStr, datetime_formats.BK_VEGAN_FORMAT) date = date - datetime.timedelta(hours=5) return date return None
def set_locations_from_api(self): params = {"method": "geo.getmetros"} xmlevents = self.call(params) root = xmlevents.documentElement metros = root.getElementsByTagName('metros')[0] for m in metros.getElementsByTagName('metro'): city_name = util.getNodeText( m.getElementsByTagName('name')[0].childNodes) country_name = util.getNodeText( m.getElementsByTagName('country')[0].childNodes) try: dbcountry, created = Country.objects.get_or_create( name=country_name, short_name=slugify(country_name)) City.objects.get_or_create(name=city_name, slug=slugify(city_name), country=dbcountry) except: print 'FAILED ON', city_name, country_name
def get_date(self): if (len(self.item.getElementsByTagName('updated')) > 0): dateStr = util.getNodeText( self.item.getElementsByTagName('updated')[0].childNodes) if dateStr == '': return None date = datetime.datetime.strptime(dateStr, datetime_formats.BK_VEGAN_FORMAT) date = date - datetime.timedelta(hours=5) return date return None
def get_date(self): if (len(self.item.getElementsByTagName('published')) > 0): dateStr = util.getNodeText(self.item.getElementsByTagName('published')[0].childNodes) if dateStr =='': return None #2010-12-24T08:27:44-05:00 dateStr = dateStr[0:dateStr.rfind('-')].strip() # 2010-12-24T08:27:44 date = datetime.datetime.strptime(dateStr, datetime_formats.GOTHAMIST_TIME_FORMAT) return date return None
def get_category(self): if (len(self.item.getElementsByTagName('category')) > 0): cat = util.getNodeText(self.item.getElementsByTagName('category')[0].childNodes) if cat == 'Film': return 'Movies' if cat == 'Art': return 'Art' if cat == 'Theater': return 'Art' return cat return 'News'
def get_date(self): if (len(self.item.getElementsByTagName('published')) > 0): dateStr = util.getNodeText(self.item.getElementsByTagName('published')[0].childNodes) if dateStr =='': return None #2011-01-16T14:03:20.364-05:00 dateStr = dateStr[0:dateStr.find('.')] #2011-01-16T14:03:20 date = datetime.datetime.strptime(dateStr, datetime_formats.GOTHAMIST_TIME_FORMAT) return date return None
def get_date(self): if (len(self.item.getElementsByTagName('updated')) > 0): dateStr = util.getNodeText(self.item.getElementsByTagName('updated')[0].childNodes) if dateStr =='': return None #2009-08-13T17:33:20.000Z dateStr = dateStr[0:dateStr.find('.')] #2009-08-13T17:33:20 date = datetime.datetime.strptime(dateStr, datetime_formats.GOTHAMIST_TIME_FORMAT) date = date - datetime.timedelta(hours=5) return date return None
def get_description(self, tag='description'): desc = '' if len(self.item.getElementsByTagName(tag)) > 0: desc = util.getNodeText( self.item.getElementsByTagName(tag)[0].childNodes) if desc == None or desc.strip() == '' or desc.find('CDATA') > -1: desc = util.getCData(self.item.getElementsByTagName(tag)[0]) if desc == None or desc.strip() == '': if len(self.item.getElementsByTagName('content')) > 0: desc = util.getNodeText( self.item.getElementsByTagName('content')[0].childNodes) if desc == None or desc.strip( ) == '' or desc.find('CDATA') > -1: desc = util.getCData( self.item.getElementsByTagName('content')[0]) if desc == None or desc.strip() == '': if len(self.item.getElementsByTagName('summary')) > 0: desc = util.getNodeText( self.item.getElementsByTagName('summary')[0].childNodes) return desc.strip()
def get_category(self): if (len(self.item.getElementsByTagName('category')) > 0): cat = util.getNodeText( self.item.getElementsByTagName('category')[0].childNodes) if cat == 'Film': return 'Movies' if cat == 'Art': return 'Art' if cat == 'Theater': return 'Art' return cat return 'News'
def get_date(self): if (len(self.item.getElementsByTagName('pubDate')) > 0): dateStr = util.getNodeText(self.item.getElementsByTagName('pubDate')[0].childNodes) if dateStr =='': return None #<pubDate>Mon, 03 Jan 2011 14:19:06 +0000</pubDate> dateStr = dateStr[dateStr.find(',')+1:].strip() dateStr = dateStr[0:dateStr.rfind('+')].strip() date = datetime.datetime.strptime(dateStr, datetime_formats.VILLAGE_VOICE_TIME_FORMAT) date = date - datetime.timedelta(hours=5) return date return None
def get_date(self): if (len(self.item.getElementsByTagName('pubDate')) > 0): dateStr = util.getNodeText(self.item.getElementsByTagName('pubDate')[0].childNodes) if dateStr =='': return None #<pubDate>Fri, 24 Dec 2010 21:00:08 -0800</pubDate> dateStr = dateStr[dateStr.find(',')+1:].strip() dateStr = dateStr[0:dateStr.rfind('-')].strip() date = datetime.datetime.strptime(dateStr, datetime_formats.L_MAG_TIME_FORMAT) date = date + datetime.timedelta(hours=3) return date return None
def get_date(self): if (len(self.item.getElementsByTagName('dc:date')) > 0): dateStr = util.getNodeText( self.item.getElementsByTagName('dc:date')[0].childNodes) if dateStr == '': return None #2011-01-01T14:00:18-05:00 date = datetime.datetime.strptime( dateStr[0:dateStr.rfind('-')], datetime_formats.GOTHAMIST_TIME_FORMAT) return date return None
def get_date(self): if (len(self.item.getElementsByTagName('pubDate')) > 0): dateStr = util.getNodeText(self.item.getElementsByTagName('pubDate')[0].childNodes) if dateStr =='': return None #<pubDate>Fri, 31 Dec 2010 15:56:00 -0500</pubDate> dateStr = dateStr[dateStr.find(',')+1:].strip() # 31 Dec 2010 15:56:00 -0500 dateStr = dateStr[0:dateStr.rfind('-')].strip() # 31 Dec 2010 15:56:00 date = datetime.datetime.strptime(dateStr, datetime_formats.L_MAG_TIME_FORMAT) return date return None
def get_date(self): if (len(self.item.getElementsByTagName('published')) > 0): dateStr = util.getNodeText( self.item.getElementsByTagName('published')[0].childNodes) if dateStr == '': return None #2011-01-16T14:03:20.364-05:00 dateStr = dateStr[0:dateStr.find('.')] #2011-01-16T14:03:20 date = datetime.datetime.strptime( dateStr, datetime_formats.GOTHAMIST_TIME_FORMAT) return date return None
def get_date(self): if (len(self.item.getElementsByTagName('published')) > 0): dateStr = util.getNodeText( self.item.getElementsByTagName('published')[0].childNodes) if dateStr == '': return None #2010-12-24T08:27:44-05:00 dateStr = dateStr[0:dateStr.rfind('-')].strip() # 2010-12-24T08:27:44 date = datetime.datetime.strptime( dateStr, datetime_formats.GOTHAMIST_TIME_FORMAT) return date return None
def get_date(self): if (len(self.item.getElementsByTagName('pubDate')) > 0): dateStr = util.getNodeText(self.item.getElementsByTagName('pubDate')[0].childNodes) if dateStr =='': return None #<pubDate>Sun, 02 Jan 2011 23:35:40 GMT</pubDate> dateStr = dateStr[dateStr.find(',')+1:].strip() # 31 Dec 2010 15:56:00 GMT dateStr = dateStr.replace('GMT', '').strip() # 31 Dec 2010 15:56:00 date = datetime.datetime.strptime(dateStr, datetime_formats.L_MAG_TIME_FORMAT) date = date - datetime.timedelta(hours=5) return date return None
def get_date(self): if (len(self.item.getElementsByTagName('pubDate')) > 0): dateStr = util.getNodeText( self.item.getElementsByTagName('pubDate')[0].childNodes) if dateStr == '': return None #<pubDate>Fri, 24 Dec 2010 21:00:08 -0800</pubDate> dateStr = dateStr[dateStr.find(',') + 1:].strip() dateStr = dateStr[0:dateStr.rfind('-')].strip() date = datetime.datetime.strptime( dateStr, datetime_formats.L_MAG_TIME_FORMAT) date = date + datetime.timedelta(hours=3) return date return None
def get_date(self): if (len(self.item.getElementsByTagName('pubDate')) > 0): dateStr = util.getNodeText( self.item.getElementsByTagName('pubDate')[0].childNodes) if dateStr == '': return None #<pubDate>Mon, 03 Jan 2011 14:19:06 +0000</pubDate> dateStr = dateStr[dateStr.find(',') + 1:].strip() dateStr = dateStr[0:dateStr.rfind('+')].strip() date = datetime.datetime.strptime( dateStr, datetime_formats.VILLAGE_VOICE_TIME_FORMAT) date = date - datetime.timedelta(hours=5) return date return None
def get_date(self): if (len(self.item.getElementsByTagName('updated')) > 0): dateStr = util.getNodeText( self.item.getElementsByTagName('updated')[0].childNodes) if dateStr == '': return None #2009-08-13T17:33:20.000Z dateStr = dateStr[0:dateStr.find('.')] #2009-08-13T17:33:20 date = datetime.datetime.strptime( dateStr, datetime_formats.GOTHAMIST_TIME_FORMAT) date = date - datetime.timedelta(hours=5) return date return None
def get_date(self): if (len(self.item.getElementsByTagName('pubDate')) > 0): dateStr = util.getNodeText(self.item.getElementsByTagName('pubDate')[0].childNodes) if dateStr =='': return None # <pubDate>Fri, 24 Sep 2010 10:44:43 -0400</pubDate> dateStr = dateStr[dateStr.find(',')+1:].strip() # 31 Dec 2010 15:56:00 -0400 dateStr = dateStr[0:dateStr.rfind('-')].strip() # 31 Dec 2010 15:56:00 date = datetime.datetime.strptime(dateStr, datetime_formats.VILLAGE_VOICE_TIME_FORMAT) date = date - datetime.timedelta(hours=1) return date return None
def get_url(self): url = '' if len(self.item.getElementsByTagName('link')) == 1: url = util.getNodeText(self.item.getElementsByTagName('link')[0].childNodes) if url == None or url.strip() == '': url = self.item.getElementsByTagName('link')[0].getAttribute('href') #bk vegan, flaming pab else: for l in self.item.getElementsByTagName('link'): #style peterson if l.getAttribute('rel') == 'alternate': url = l.getAttribute('href') break if url == None: url = '' return url.strip()
def get_date(self): if (len(self.item.getElementsByTagName('pubDate')) > 0): dateStr = util.getNodeText( self.item.getElementsByTagName('pubDate')[0].childNodes) if dateStr == '': return None #<pubDate>Fri, 31 Dec 2010 15:56:00 -0500</pubDate> dateStr = dateStr[dateStr.find(',') + 1:].strip() # 31 Dec 2010 15:56:00 -0500 dateStr = dateStr[0:dateStr.rfind('-')].strip() # 31 Dec 2010 15:56:00 date = datetime.datetime.strptime( dateStr, datetime_formats.L_MAG_TIME_FORMAT) return date return None
def get_date(self): if (len(self.item.getElementsByTagName('pubDate')) > 0): dateStr = util.getNodeText( self.item.getElementsByTagName('pubDate')[0].childNodes) if dateStr == '': return None #<pubDate>Sun, 02 Jan 2011 23:35:40 GMT</pubDate> dateStr = dateStr[dateStr.find(',') + 1:].strip() # 31 Dec 2010 15:56:00 GMT dateStr = dateStr.replace('GMT', '').strip() # 31 Dec 2010 15:56:00 date = datetime.datetime.strptime( dateStr, datetime_formats.L_MAG_TIME_FORMAT) date = date - datetime.timedelta(hours=5) return date return None
def get_date(self): if (len(self.item.getElementsByTagName('pubDate')) > 0): dateStr = util.getNodeText( self.item.getElementsByTagName('pubDate')[0].childNodes) if dateStr == '': return None # <pubDate>Fri, 24 Sep 2010 10:44:43 -0400</pubDate> dateStr = dateStr[dateStr.find(',') + 1:].strip() # 31 Dec 2010 15:56:00 -0400 dateStr = dateStr[0:dateStr.rfind('-')].strip() # 31 Dec 2010 15:56:00 date = datetime.datetime.strptime( dateStr, datetime_formats.VILLAGE_VOICE_TIME_FORMAT) date = date - datetime.timedelta(hours=1) return date return None
def get_url(self): url = '' if len(self.item.getElementsByTagName('link')) == 1: url = util.getNodeText( self.item.getElementsByTagName('link')[0].childNodes) if url == None or url.strip() == '': url = self.item.getElementsByTagName('link')[0].getAttribute( 'href') #bk vegan, flaming pab else: for l in self.item.getElementsByTagName('link'): #style peterson if l.getAttribute('rel') == 'alternate': url = l.getAttribute('href') break if url == None: url = '' return url.strip()
def get_date(self): if (len(self.item.getElementsByTagName('pubDate')) > 0): link = self.source.url; dateStr = util.getNodeText(self.item.getElementsByTagName('pubDate')[0].childNodes) if dateStr =='': return None if link.find('promoEvents') > -0: #<pubDate>2010-10-14 15:39:09</pubDate> date = datetime.datetime.strptime(dateStr, datetime_formats.VILLAGE_VOICE_PROMO_TIME_FORMAT) return date else: #<pubDate>Fri, 31 Dec 2010 15:56:00 -0500</pubDate> dateStr = dateStr[dateStr.find(',')+1:].strip() # 31 Dec 2010 15:56:00 -0500 dateStr = dateStr[0:dateStr.rfind('-')].strip() # 31 Dec 2010 15:56:00 date = datetime.datetime.strptime(dateStr, datetime_formats.VILLAGE_VOICE_TIME_FORMAT) return date return None
def get_date(self): if (len(self.item.getElementsByTagName('pubDate')) > 0): link = self.source.url dateStr = util.getNodeText( self.item.getElementsByTagName('pubDate')[0].childNodes) if dateStr == '': return None if link.find('promoEvents') > -0: #<pubDate>2010-10-14 15:39:09</pubDate> date = datetime.datetime.strptime( dateStr, datetime_formats.VILLAGE_VOICE_PROMO_TIME_FORMAT) return date else: #<pubDate>Fri, 31 Dec 2010 15:56:00 -0500</pubDate> dateStr = dateStr[dateStr.find(',') + 1:].strip() # 31 Dec 2010 15:56:00 -0500 dateStr = dateStr[0:dateStr.rfind('-')].strip() # 31 Dec 2010 15:56:00 date = datetime.datetime.strptime( dateStr, datetime_formats.VILLAGE_VOICE_TIME_FORMAT) return date return None
def save_event_to_db(self, eventNode, loc): artists = [] headliners = [] # load up artists artistsNodes = eventNode.getElementsByTagName("artists")[0] for a in artistsNodes.getElementsByTagName("artist"): try: artist = util.getNodeText(a.childNodes) a = Artist.objects.get_or_create(name=artist)[0] artists.append(a) except: print exception_util.get_exception_str() for h in artistsNodes.getElementsByTagName("headliner"): try: headliner = util.getNodeText(h.childNodes) h = Artist.objects.get_or_create(name=headliner)[0] headliners.append(h) except: print exception_util.get_exception_str() # venue stuff vNode = eventNode.getElementsByTagName("venue")[0] vId = util.getNodeText(vNode.getElementsByTagName("id")[0].childNodes) vName = util.getNodeText(vNode.getElementsByTagName("name")[0].childNodes) vExternalUrl = util.getNodeText(vNode.getElementsByTagName("url")[0].childNodes) vUrl = util.getNodeText(vNode.getElementsByTagName("website")[0].childNodes) vPhone = util.getNodeText(vNode.getElementsByTagName("phonenumber")[0].childNodes) imgNodes = vNode.getElementsByTagName("image") vImgSm = "" vImgMed = "" vImgLg = "" vImgXLg = "" vImgMega = "" for iNode in imgNodes: if iNode.hasChildNodes(): if iNode.getAttribute("size") == "small": vImgSm = util.getNodeText(iNode.childNodes) elif iNode.getAttribute("size") == "medium": vImgMed = util.getNodeText(iNode.childNodes) elif iNode.getAttribute("size") == "large": vImgLg = util.getNodeText(iNode.childNodes) elif iNode.getAttribute("size") == "extralarge": vImgXLg = util.getNodeText(iNode.childNodes) elif iNode.getAttribute("size") == "mega": vImgMega = util.getNodeText(iNode.childNodes) locNode = vNode.getElementsByTagName("location")[0] # vCity = util.getNodeText(locNode.getElementsByTagName('city')[0].childNodes) vCountry = util.getNodeText(locNode.getElementsByTagName("country")[0].childNodes) vStreet = util.getNodeText(locNode.getElementsByTagName("street")[0].childNodes) vZip = util.getNodeText(locNode.getElementsByTagName("postalcode")[0].childNodes) geoNode = locNode.getElementsByTagName("geo:point")[0] vLat = util.getNodeText(geoNode.getElementsByTagName("geo:lat")[0].childNodes) vLong = util.getNodeText(geoNode.getElementsByTagName("geo:long")[0].childNodes) city = City.objects.get(name=loc) v = None try: v = Venue.objects.get_or_create( external_id=vId, name=vName, city=city, country=vCountry, source=self.source )[0] v.street = vStreet v.zip = vZip v.lat = vLat v.long = vLong v.external_url = vExternalUrl v.url = vUrl v.phone_number = vPhone v.image_size_sm = vImgSm v.image_size_med = vImgMed v.image_size_lg = vImgLg v.image_size_xl = vImgXLg v.image_size_mega = vImgMega v.save() except: print "VENUE PROB", exception_util.get_exception_str() # Ticket stuff tickets = [] ticketsNode = eventNode.getElementsByTagName("tickets") if len(ticketsNode) != 0: ticketsNode = ticketsNode[0] for tNode in ticketsNode.getElementsByTagName("ticket"): supplier = tNode.getAttribute("supplier") tixUrl = util.getNodeText(tNode.childNodes) ticket = Ticket.objects.get_or_create(name=supplier, url=tixUrl)[0] tickets.append(ticket) # Tags stuff tags = [] tagsNode = eventNode.getElementsByTagName("tags") if len(tagsNode) != 0: tagsNode = tagsNode[0] for tNode in tagsNode.getElementsByTagName("tag"): tagTxt = util.getNodeText(tNode.childNodes) tag = Tag.objects.get_or_create(name=tagTxt)[0] tags.append(tag) # Event stuff lastfm_id = util.getNodeText(eventNode.getElementsByTagName("id")[0].childNodes) lastfm_id = "lfm" + lastfm_id title = util.getNodeText(eventNode.getElementsByTagName("title")[0].childNodes) startDate = util.getNodeText(eventNode.getElementsByTagName("startDate")[0].childNodes) start_date = datetime.datetime.fromtimestamp( time.mktime(time.strptime(startDate, datetime_formats.LAST_FM_FORMAT)) ) startDate = start_date.strftime(datetime_formats.MYSQL_FORMAT) description = util.getNodeText(eventNode.getElementsByTagName("description")[0].childNodes) # TODO get text??? external_url = "" url = "" for urlNode in eventNode.getElementsByTagName("url"): if urlNode.parentNode == eventNode: external_url = util.getNodeText(urlNode.childNodes) for urlNode in eventNode.getElementsByTagName("website"): if urlNode.parentNode == eventNode: url = util.getNodeText(urlNode.childNodes) cancelled = util.getNodeText(eventNode.getElementsByTagName("cancelled")[0].childNodes) if cancelled == "": cancelled = 0 else: cancelled = int(cancelled) imgSm = "" imgMed = "" imgLg = "" imgXLg = "" imgNodes = eventNode.getElementsByTagName("image") for iNode in imgNodes: if iNode.parentNode == eventNode: if iNode.getAttribute("size") == "small": imgSm = util.getNodeText(iNode.childNodes) elif iNode.getAttribute("size") == "medium": imgMed = util.getNodeText(iNode.childNodes) elif iNode.getAttribute("size") == "large": imgLg = util.getNodeText(iNode.childNodes) elif iNode.getAttribute("size") == "extralarge": imgXLg = util.getNodeText(iNode.childNodes) if v != None: event, created = Event.objects.get_or_create(external_id=lastfm_id) event.name = title event.start_time = start_date # default all last.fm events to end at 6 event.end_time = start_date + datetime.timedelta(hours=6) event.description = description event.image_size_sm = imgSm event.image_size_med = imgMed event.image_size_lg = imgLg event.image_size_xl = imgXLg event.cancelled = cancelled event.venue = v event.category = self.source.category event.source = self.source event.external_url = external_url event.url = url for a in artists: event.artists.add(a) for h in headliners: event.headliners.add(h) for t in tags: event.tags.add(t) for t in tickets: event.tickets.add(t) try: event.save() except: print "EVENT ERR", exception_util.get_exception_str()
def save_event_to_db(self, eventNode, loc): artists = [] headliners = [] # load up artists artistsNodes = eventNode.getElementsByTagName('artists')[0] for a in artistsNodes.getElementsByTagName('artist'): try: artist = util.getNodeText(a.childNodes) a = Artist.objects.get_or_create(name=artist)[0] artists.append(a) except: print exception_util.get_exception_str() for h in artistsNodes.getElementsByTagName('headliner'): try: headliner = util.getNodeText(h.childNodes) h = Artist.objects.get_or_create(name=headliner)[0] headliners.append(h) except: print exception_util.get_exception_str() # venue stuff vNode = eventNode.getElementsByTagName('venue')[0] vId = util.getNodeText(vNode.getElementsByTagName('id')[0].childNodes) vName = util.getNodeText( vNode.getElementsByTagName('name')[0].childNodes) vExternalUrl = util.getNodeText( vNode.getElementsByTagName('url')[0].childNodes) vUrl = util.getNodeText( vNode.getElementsByTagName('website')[0].childNodes) vPhone = util.getNodeText( vNode.getElementsByTagName('phonenumber')[0].childNodes) imgNodes = vNode.getElementsByTagName('image') vImgSm = "" vImgMed = "" vImgLg = "" vImgXLg = "" vImgMega = "" for iNode in imgNodes: if iNode.hasChildNodes(): if iNode.getAttribute('size') == "small": vImgSm = util.getNodeText(iNode.childNodes) elif iNode.getAttribute('size') == "medium": vImgMed = util.getNodeText(iNode.childNodes) elif iNode.getAttribute('size') == "large": vImgLg = util.getNodeText(iNode.childNodes) elif iNode.getAttribute('size') == "extralarge": vImgXLg = util.getNodeText(iNode.childNodes) elif iNode.getAttribute('size') == "mega": vImgMega = util.getNodeText(iNode.childNodes) locNode = vNode.getElementsByTagName('location')[0] #vCity = util.getNodeText(locNode.getElementsByTagName('city')[0].childNodes) vCountry = util.getNodeText( locNode.getElementsByTagName('country')[0].childNodes) vStreet = util.getNodeText( locNode.getElementsByTagName('street')[0].childNodes) vZip = util.getNodeText( locNode.getElementsByTagName('postalcode')[0].childNodes) geoNode = locNode.getElementsByTagName('geo:point')[0] vLat = util.getNodeText( geoNode.getElementsByTagName('geo:lat')[0].childNodes) vLong = util.getNodeText( geoNode.getElementsByTagName('geo:long')[0].childNodes) city = City.objects.get(name=loc) v = None try: v = Venue.objects.get_or_create(external_id=vId, name=vName, city=city, country=vCountry, source=self.source)[0] v.street = vStreet v.zip = vZip v.lat = vLat v.long = vLong v.external_url = vExternalUrl v.url = vUrl v.phone_number = vPhone v.image_size_sm = vImgSm v.image_size_med = vImgMed v.image_size_lg = vImgLg v.image_size_xl = vImgXLg v.image_size_mega = vImgMega v.save() except: print "VENUE PROB", exception_util.get_exception_str() # Ticket stuff tickets = [] ticketsNode = eventNode.getElementsByTagName('tickets') if len(ticketsNode) != 0: ticketsNode = ticketsNode[0] for tNode in ticketsNode.getElementsByTagName('ticket'): supplier = tNode.getAttribute('supplier') tixUrl = util.getNodeText(tNode.childNodes) ticket = Ticket.objects.get_or_create(name=supplier, url=tixUrl)[0] tickets.append(ticket) # Tags stuff tags = [] tagsNode = eventNode.getElementsByTagName('tags') if len(tagsNode) != 0: tagsNode = tagsNode[0] for tNode in tagsNode.getElementsByTagName('tag'): tagTxt = util.getNodeText(tNode.childNodes) tag = Tag.objects.get_or_create(name=tagTxt)[0] tags.append(tag) # Event stuff lastfm_id = util.getNodeText( eventNode.getElementsByTagName('id')[0].childNodes) lastfm_id = 'lfm' + lastfm_id title = util.getNodeText( eventNode.getElementsByTagName('title')[0].childNodes) startDate = util.getNodeText( eventNode.getElementsByTagName('startDate')[0].childNodes) start_date = datetime.datetime.fromtimestamp( time.mktime( time.strptime(startDate, datetime_formats.LAST_FM_FORMAT))) startDate = start_date.strftime(datetime_formats.MYSQL_FORMAT) description = util.getNodeText( eventNode.getElementsByTagName('description') [0].childNodes) #TODO get text??? external_url = "" url = "" for urlNode in eventNode.getElementsByTagName('url'): if urlNode.parentNode == eventNode: external_url = util.getNodeText(urlNode.childNodes) for urlNode in eventNode.getElementsByTagName('website'): if urlNode.parentNode == eventNode: url = util.getNodeText(urlNode.childNodes) cancelled = util.getNodeText( eventNode.getElementsByTagName('cancelled')[0].childNodes) if cancelled == '': cancelled = 0 else: cancelled = int(cancelled) imgSm = "" imgMed = "" imgLg = "" imgXLg = "" imgNodes = eventNode.getElementsByTagName('image') for iNode in imgNodes: if iNode.parentNode == eventNode: if iNode.getAttribute('size') == "small": imgSm = util.getNodeText(iNode.childNodes) elif iNode.getAttribute('size') == "medium": imgMed = util.getNodeText(iNode.childNodes) elif iNode.getAttribute('size') == "large": imgLg = util.getNodeText(iNode.childNodes) elif iNode.getAttribute('size') == "extralarge": imgXLg = util.getNodeText(iNode.childNodes) if v != None: event, created = Event.objects.get_or_create(external_id=lastfm_id) event.name = title event.start_time = start_date # default all last.fm events to end at 6 event.end_time = start_date + datetime.timedelta(hours=6) event.description = description event.image_size_sm = imgSm event.image_size_med = imgMed event.image_size_lg = imgLg event.image_size_xl = imgXLg event.cancelled = cancelled event.venue = v event.category = self.source.category event.source = self.source event.external_url = external_url event.url = url for a in artists: event.artists.add(a) for h in headliners: event.headliners.add(h) for t in tags: event.tags.add(t) for t in tickets: event.tickets.add(t) try: event.save() except: print "EVENT ERR", exception_util.get_exception_str()
def get_category(self): if (len(self.item.getElementsByTagName('dc:subject')) > 0): return util.getNodeText(self.item.getElementsByTagName('dc:subject')[0].childNodes) return 'News'
def get_category(self): if (len(self.item.getElementsByTagName('dc:subject')) > 0): return util.getNodeText( self.item.getElementsByTagName('dc:subject')[0].childNodes) return 'News'
def get_title(self): title = util.getNodeText(self.item.getElementsByTagName('title')[0].childNodes) if title.find('CDATA') > -1 or title.strip() == '': title = util.getCData(self.item.getElementsByTagName('title')[0]) # village voice, style peterson return title.strip()