class InsertSparqlTest(unittest.TestCase): def setUp(self): self.connect = VODBC.connect() self.connect.execute('SPARQL CLEAR GRAPH <%s>' % GRAPH) print 'getting myspace data' self.short_url_artist = 'kurtisrandom' self.uid_artist = '30650288' self.A = MyspaceScrape(uid=self.uid_artist) self.A.run() def test_insert_sparql(self): print 'inserting' #cursor = self.connect.cursor() self.A.insert_sparql(self.connect, GRAPH) print 'checking insert' res = self.connect.execute('''SPARQL SELECT ?same FROM <%s> WHERE { <http://dbtune.org/myspace/uid/30650288> <http://www.w3.org/2002/07/owl#sameAs> ?same } ''' % GRAPH) same = res.next()[0] assert 'http://dbtune.org/myspace/kurtisrandom' == same, 'wrong sameAs result: ' +str(same) self.connect.commit() #cursor.close() def tearDown(self): self.connect.execute('SPARQL CLEAR GRAPH <%s>' % GRAPH) self.connect.close()
def default(self, artist_name): M = MyspaceScrape(short_url=artist_name) try: M.get_page() M.get_uid() except MyspaceException: print "invalid myspace url or problems w/ myspace server. if you are sure the url is correct, try again in a few seconds..." else: raise cherrypy.HTTPRedirect(URL_BASE+'/uid/'+M.uid, 303)
def setUp(self): self.short_url_artist = 'kurtisrandom' self.uid_artist = '30650288' self.A = MyspaceScrape(uid=self.uid_artist) self.A.get_page() self.short_url_non = 'flexboogie' self.uid_non = '8840037' # basic page getting self.M = MyspaceScrape(uid = self.uid_non) self.M.get_page()
def setUp(self): self.short_url_artist = 'kurtisrandom' self.uid_artist = '30650288' self.M = MyspaceScrape(uid=self.uid_artist) self.M.get_page() self.M.get_uid() self.M.is_artist()
def setUp(self): self.connect = VODBC.connect() self.connect.execute('SPARQL CLEAR GRAPH <%s>' % GRAPH) print 'getting myspace data' self.short_url_artist = 'kurtisrandom' self.uid_artist = '30650288' self.A = MyspaceScrape(uid=self.uid_artist) self.A.run()
def setUp(self): self.short_url_non = 'flexboogie' self.uid_non = '8840037' # basic page getting self.M = MyspaceScrape(uid = self.uid_non) self.M.get_page() self.M.get_uid() self.M.is_artist()
def test_get_songs(self): # do the song getting M = MyspaceScrape(uid=self.uid_artist) M.get_page() M.get_uid() M.is_artist() M.get_songs() # verify rdf w/ sparql graph = mopy.exportRDFGraph(M.mi) print graph.serialize() titles =[] for row in graph.query('''SELECT ?titles WHERE { ?track a <http://purl.org/ontology/mo/Track> . ?track <http://purl.org/dc/elements/1.1/title> ?titles . } '''): #print row title = '%s' % row titles.append(title) titles.sort() assert titles==[u'A Big Idea short mix', u'Blue92', u'Just to Get a Remix', u'Know What You Want feat Albie', u'Out of mi head feat Raquelle', u'Time addicted to junk mix'], 'wrong set of titles: '+str(titles)
def default(self, uid): if uid.endswith('.rdf'): # serve the data cherrypy.response.headers['Content-Type'] = 'application/rdf+xml; charset=UTF-8;' print USE_SPARQL sparql_match = False if USE_SPARQL: #print "USING SPARQL" # check sparql endpoint connect = VODBC.connect() cursor = connect.cursor() ss = SparqlSpace('http://dbtune.org/myspace/uid/'+str(uid.rsplit('.rdf')[0]), cursor) if ss.select(): #print "FOUND SPARQL MATCH" sparql_match = True ret = ss.make_graph() cursor.close() connect.commit() connect.close() return ret if not sparql_match: M = MyspaceScrape(uid=uid.rsplit('.rdf')[0]) M.run() if USE_SPARQL: M.insert_sparql(cursor) cursor.close() connect.commit() connect.close() return M.serialize() elif uid.endswith('.html'): # serve the html - THIS NEVER HAPPENS AND PRY NEVER WILL :p mh = Htmlify("http://dbtune.org/myspace/uid/"+uid.rsplit('.html')[0]) mh.parse_rdf() mh.get_all() return mh.html_head + mh.serialize_n3() + mh.get_available_as() +mh.html_tail else: raise cherrypy.HTTPRedirect(URL_BASE+'/uid/'+uid+'.rdf', 303)
class NonArtistTest(unittest.TestCase): def setUp(self): self.short_url_non = 'flexboogie' self.uid_non = '8840037' # basic page getting self.M = MyspaceScrape(uid = self.uid_non) self.M.get_page() self.M.get_uid() self.M.is_artist() def test_get_nice_url(self): url = self.M.get_nice_url_non_artist() assert url == 'flexboogie', 'clean url is wrong: '+str(url) def test_get_stats_non_artist(self): self.M.get_stats_non_artist() self.M.serialize() graph = mopy.exportRDFGraph(self.M.mi) for row in graph.query('''SELECT ?age where { ?x <http://purl.org/ontology/myspace#age> ?age } '''): age = row[0] print age assert int(age)==100, 'wrong age ' + str(age) for row in graph.query('''SELECT ?gender where { ?x <http://xmlns.com/foaf/0.1/gender> ?gender . } '''): gender = row[0] assert gender == 'male' , 'wrong gender '+ str(gender) for row in graph.query('''SELECT ?loc ?reg ?co where { ?x <http://purl.org/ontology/myspace#locality> ?loc . ?x <http://purl.org/ontology/myspace#region> ?reg . ?x <http://purl.org/ontology/myspace#country> ?co . } '''): loc = row[0] #print loc reg=row[1] #print reg co = row[2] #print co assert loc == 'SAINT LOUIS' , 'wrong locality '+str(loc) assert reg == 'Missouri' , 'wrong region '+str(reg) assert co == 'US' , 'wrong country '+str(co) for row in graph.query('SELECT ?totf where { ?x <http://purl.org/ontology/myspace#totalFriends> ?totf } '): totf = row[0] assert int(totf)>2000 , 'wrong number of friends '+str(totf) def test_get_image_non_artist(self): img = self.M.get_image_non_artist() # just check it looks vaguely like an image url assert img.startswith('http://') and (img.endswith('.jpg') or img.endswith('.gif')), 'does not look like an image url: '+str(img) def test_get_friends(self): self.M.get_friends_non_artist() self.M.serialize() graph = mopy.exportRDFGraph(self.M.mi) friends = [] for row in graph.query('''SELECT ?friends WHERE { ?s <http://purl.org/ontology/myspace#topFriend> ?friends } '''): friend = '%s' % row friends.append(friend) assert len(friends)>4 , 'too few friends found - some kind of problem in .get_friends_non_artist()'
class ArtistTest(unittest.TestCase): def setUp(self): self.short_url_artist = 'kurtisrandom' self.uid_artist = '30650288' self.M = MyspaceScrape(uid=self.uid_artist) self.M.get_page() self.M.get_uid() self.M.is_artist() def test_get_nice_url(self): url = self.M.get_nice_url() assert url == 'kurtisrandom', 'clean url is wrong: '+str(url) def test_get_image(self): img = self.M.get_image() # just check it looks vaguely like an image url assert img.startswith('http://') and img.endswith('.jpg'), 'does not look like an image url: '+str(img) def test_get_genres(self): genres = self.M.get_genres() gnames = [] for genre in genres: gnames.append(genre.name.pop()) gnames.sort() assert gnames == ['Experimental', 'Hip Hop', 'R&B'], 'wrong genre names ' +str(gnames) def test_get_friends(self): self.M.get_friends() self.M.serialize() graph = self.M.graph friends=[] for row in graph.query('''SELECT ?friends WHERE { ?s <http://purl.org/ontology/myspace#topFriend> ?friends } '''): friend = '%s' % row friends.append(friend) friends.sort() assert friends == [u'http://dbtune.org/myspace/7shotscreamers', u'http://dbtune.org/myspace/Dawit7', u'http://dbtune.org/myspace/dbaad', u'http://dbtune.org/myspace/dbirghenthal', u'http://dbtune.org/myspace/djthumpasaurus', u'http://dbtune.org/myspace/duplicateband', u'http://dbtune.org/myspace/flexboogie', u'http://dbtune.org/myspace/fliplynch', u'http://dbtune.org/myspace/serpientesmsn', u'http://dbtune.org/myspace/speedballband', u'http://dbtune.org/myspace/thelegendjamesbrown', u'http://dbtune.org/myspace/uid/30642042'] , 'wrong friends list '+str(friends) def test_get_stats(self): self.M.get_stats() self.M.serialize() graph = self.M.graph #graph = mopy.exportRDFGraph(self.M.mi) # raw country and city text tCountry = 'United Kingdom' tCity = 'London,' for row in graph.query('''SELECT ?loc ?co where { ?x <http://purl.org/ontology/myspace#locality> ?loc . ?x <http://purl.org/ontology/myspace#country> ?co . } '''): loc = row[0] #print loc co = row[1] #print co assert loc == 'London' , 'wrong locality '+ str(loc) assert co == 'United Kingdom' , 'wrong country '+ str(co) # check based near for row in graph.query('''select ?based where { ?x <http://xmlns.com/foaf/0.1/based_near> ?based } '''): based = row[0] assert str(based) == 'http://sws.geonames.org/2635167/', 'wrong based_near: '+ str(based) # total friends count totf = -1 for row in graph.query('SELECT ?totf where { ?x <http://purl.org/ontology/myspace#totalFriends> ?totf } '): totf = row[0] assert int(totf)>190 , 'wrong number of total friends '+str(totf) views = -1 for row in graph.query('SELECT ?views WHERE { ?x <http://purl.org/ontology/myspace#profileViews> ?views } '): views = row[0] assert views != -1 , 'faild to get profile views: ' + str(views)
class CommonTest(unittest.TestCase): def setUp(self): self.short_url_artist = 'kurtisrandom' self.uid_artist = '30650288' self.A = MyspaceScrape(uid=self.uid_artist) self.A.get_page() self.short_url_non = 'flexboogie' self.uid_non = '8840037' # basic page getting self.M = MyspaceScrape(uid = self.uid_non) self.M.get_page() def test_get_uid(self): self.M.get_uid() assert self.M.uid == self.uid_non, 'uid mismatch, got '+str(M.uid)+ ' expected ' +str(self.uid_artist) self.A.get_uid() assert self.A.uid == self.uid_artist, 'uid mismatch, got '+str(M.uid)+ ' expected ' +str(self.uid_artist) def test_is_artist(self): self.A.get_uid() assert self.A.is_artist(), 'should be an artist' self.M.get_uid() assert not self.M.is_artist(), 'Flex Boogie is not an artist' assert self.M.name == '''Flex Boogie (Flex Boogie For Real)''', 'wrong name: ' +str(self.M.name)