コード例 #1
0
    def query_location(self, article):

        article_url = '<http://' + language + '.wikipedia.org/wiki/' + article + '>'
        location = None

        # SPARQL query that
        namespaces = """
        PREFIX dbo: <http://dbpedia.org/resource/classes#>
        PREFIX dbp: <http://dbpedia.org/property/>
        """

        location_query = """        
        {
            SELECT * WHERE {
                ?match dbp:latitude ?lat .
                ?match dbp:longitude ?long
            }
        }
        UNION
        {
            SELECT * WHERE {
                ?match geo:lat ?lat .
                ?match geo:long ?long
            }
        }
        UNION
        {
            SELECT * WHERE {
                ?match dbp:latDeg ?latDeg .
                ?match dbp:latMin ?latMin .
                ?match dbp:latSec ?latSec .
                ?match dbp:lonDeg ?lonDeg .
                ?match dbp:lonMin ?lonMin .
                ?match dbp:lonSec ?lonSec
            }
        }
        UNION
        {
            SELECT * WHERE {
                ?match dbp:latDeg ?latDeg .
                ?match dbp:latMin ?latMin .
                ?match dbp:latSec ?latSec .
                ?match dbp:lonDeg ?lonDeg .
                ?match dbp:lonMin ?lonMin .
                ?match dbp:lonSec ?lonSec .
                ?match dbp:latDir ?latDir .
                ?match dbp:lonDir ?lonDir
            }
        }
        UNION
        {
            SELECT * WHERE {
                ?match dbp:latDegrees ?latDeg .
                ?match dbp:latMinutes ?latMin .
                ?match dbp:latSeconds ?latSec .
                ?match dbp:longDegrees ?lonDeg .
                ?match dbp:longMinutes ?lonMin .
                ?match dbp:longSeconds ?lonSec .
                ?match dbp:latDirection ?latDir .
                ?match dbp:longDirection ?lonDir
    
            }
        }
    
    
        UNION
        {
            SELECT * WHERE {
                ?match dbp:latd ?lat .
                ?match dbp:longd ?long
            }
        }
                
        """
        query_string = namespaces + """
        SELECT * WHERE {
            ?match foaf:isPrimaryTopicOf """ + article_url + """ .
            """ + location_query + """
        }
        """

        query_string_with_offset = query_string

        self.sparql.setQuery(query_string_with_offset)

        try:
            results = self.sparql.query().convert()
        except QueryBadFormed:
            print "SPARQL query bad formed: " + query_string_with_offset
            return None
        except urllib2.HTTPError:
            print "HTTP Error 502: " + article
            return None
        except urllib2.URLError:
            print "Network is unreachable while working on: " + article
            return None
        except socket.timeout:
            print "Query timed out for: " + article
            return None

        if len(results["results"]["bindings"]) > 0:
            coordinates_array = []
            for result in results["results"]["bindings"]:
                if "lat" in result:
                    latitude = result["lat"]["value"]
                    longitude = result["long"]["value"]
                else:
                    lat_dir = 0
                    if "latDir" in result:
                        lat_dir = result["latDir"]["value"]

                    latitude = self.dms2dd(result["latDeg"]["value"],
                                           result["latMin"]["value"],
                                           result["latSec"]["value"], lat_dir)
                    longitude = self.dms2dd(result["lonDeg"]["value"],
                                            result["lonMin"]["value"],
                                            result["lonSec"]["value"], lat_dir)

                latitude = float(latitude)
                longitude = float(longitude)
                coordinates_array.append((latitude, longitude))

            majority_vote = majority_voting.vote(coordinates_array, threshold)
            location = country_lookup.get_country(majority_vote[0],
                                                  majority_vote[1])

        return location
コード例 #2
0
            location_tuple = (float(result["lat"]["value"]),
                              float(result["long"]["value"]))
            if url_location_dictionary.has_key(stripped_url):
                url_location_dictionary[stripped_url].append(location_tuple)
            else:
                locations = []
                locations.append(location_tuple)
                url_location_dictionary[stripped_url] = locations

        offset += limit
    else:
        break

# do a majority voting on the retrieved locations
url_majority_location_dictionary = {}
for url in url_location_dictionary:
    # skip web.archive.org links
    if "web.archive.org" in str(url):
        continue
    # skip web.archive.org links
    if "webcitation.org" in str(url):
        continue
    url_majority_location_dictionary[url] = majority_voting.vote(
        url_location_dictionary[url], absolute_threshold)

# write results to a JSON file
with open(outputfile_path, 'w') as f:
    json.dump(url_majority_location_dictionary, f, indent=4, sort_keys=True)
    print "File was stored successfully"
コード例 #3
0
 def query_location(self,article):
     
     article_url = '<http://'+ language + '.wikipedia.org/wiki/'+article + '>'
     location = None
         
         
     # SPARQL query that 
     namespaces = """
     PREFIX dbo: <http://dbpedia.org/resource/classes#>
     PREFIX dbp: <http://dbpedia.org/property/>
     """
         
     location_query = """        
     {
         SELECT * WHERE {
             ?match dbp:latitude ?lat .
             ?match dbp:longitude ?long
         }
     }
     UNION
     {
         SELECT * WHERE {
             ?match geo:lat ?lat .
             ?match geo:long ?long
         }
     }
     UNION
     {
         SELECT * WHERE {
             ?match dbp:latDeg ?latDeg .
             ?match dbp:latMin ?latMin .
             ?match dbp:latSec ?latSec .
             ?match dbp:lonDeg ?lonDeg .
             ?match dbp:lonMin ?lonMin .
             ?match dbp:lonSec ?lonSec
         }
     }
     UNION
     {
         SELECT * WHERE {
             ?match dbp:latDeg ?latDeg .
             ?match dbp:latMin ?latMin .
             ?match dbp:latSec ?latSec .
             ?match dbp:lonDeg ?lonDeg .
             ?match dbp:lonMin ?lonMin .
             ?match dbp:lonSec ?lonSec .
             ?match dbp:latDir ?latDir .
             ?match dbp:lonDir ?lonDir
         }
     }
     UNION
     {
         SELECT * WHERE {
             ?match dbp:latDegrees ?latDeg .
             ?match dbp:latMinutes ?latMin .
             ?match dbp:latSeconds ?latSec .
             ?match dbp:longDegrees ?lonDeg .
             ?match dbp:longMinutes ?lonMin .
             ?match dbp:longSeconds ?lonSec .
             ?match dbp:latDirection ?latDir .
             ?match dbp:longDirection ?lonDir
 
         }
     }
 
 
     UNION
     {
         SELECT * WHERE {
             ?match dbp:latd ?lat .
             ?match dbp:longd ?long
         }
     }
             
     """
     query_string = namespaces + """
     SELECT * WHERE {
         ?match foaf:isPrimaryTopicOf """+article_url+""" .
         """+location_query+"""
     }
     """
             
     query_string_with_offset = query_string 
         
     self.sparql.setQuery(query_string_with_offset)
 
     try:
         results = self.sparql.query().convert()
     except QueryBadFormed:
         print "SPARQL query bad formed: " + query_string_with_offset
         return None
     except urllib2.HTTPError:
         print "HTTP Error 502: " + article
         return None
     except urllib2.URLError:
         print "Network is unreachable while working on: " + article
         return None
     except socket.timeout:
         print "Query timed out for: " + article
         return None
         
         
     if len(results["results"]["bindings"]) > 0:
         coordinates_array = []
         for result in results["results"]["bindings"]:
             if  "lat" in result:
                 latitude = result["lat"]["value"]
                 longitude = result["long"]["value"]
             else:
                 lat_dir=0
                 if "latDir" in result:
                     lat_dir = result["latDir"]["value"]
                 
                 latitude = self.dms2dd(result["latDeg"]["value"], result["latMin"]["value"], result["latSec"]["value"], lat_dir)
                 longitude = self.dms2dd(result["lonDeg"]["value"], result["lonMin"]["value"], result["lonSec"]["value"], lat_dir)
 
             latitude = float(latitude)
             longitude = float(longitude)
             coordinates_array.append((latitude,longitude))
 
         majority_vote =  majority_voting.vote(coordinates_array,threshold)
         location = country_lookup.get_country(majority_vote[0],majority_vote[1])
         
     return location
コード例 #4
0
            stripped_url = '{uri.scheme}://{uri.netloc}/'.format(uri=parsed_url)

            location_tuple=(float(result["lat"]["value"]),float(result["long"]["value"]))
            if url_location_dictionary.has_key(stripped_url) :
                url_location_dictionary[stripped_url].append(location_tuple)
            else:
                locations = []
                locations.append(location_tuple)
                url_location_dictionary[stripped_url] = locations
                
        offset += limit
    else:
        break
    

# do a majority voting on the retrieved locations
url_majority_location_dictionary = {}
for url in url_location_dictionary:
    # skip web.archive.org links
    if "web.archive.org" in str(url):
        continue
    # skip web.archive.org links
    if "webcitation.org" in str(url):
        continue
    url_majority_location_dictionary[url]=majority_voting.vote(url_location_dictionary[url],absolute_threshold)

# write results to a JSON file
with open(outputfile_path, 'w') as f:
    json.dump(url_majority_location_dictionary, f, indent=4, sort_keys=True)
    print "File was stored successfully"