Example #1
0
def main():
    if len(sys.argv) != 2:
        print('One parameter expected: stream URL')
        return
    url = sys.argv[1]
    client = SynchronousClient(url, parse_event_body=False)
    while not client.stream_finished:
        events = client.receive_events()
        for event in events:
            print(str(event))
            print()
Example #2
0
def main():

    # Some useful namespace declarations
    GEONAMES = Namespace("http://www.geonames.org/ontology#")
    GEO = Namespace("http://www.w3.org/2003/01/geo/wgs84_pos#")
    DC = Namespace("http://purl.org/dc/elements/1.1/")

    # Parse input arguments
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-u",
        "--user",
        dest="user",
        required=True,
        help="the identifier of a registered GeoNames user (e.g. demo)")
    parser.add_argument(
        "-w",
        "--watched",
        dest="watched",
        action='store_const',
        const=True,
        default=False,
        help=
        "a boolean flag indicating whether all tweets, or only those whose location is in the watched list should be published"
    )
    parser.add_argument(
        "-t",
        "--type",
        dest="type",
        required=True,
        choices=["id", "country", "position"],
        help=
        "query type to be used to select the relevant tweets (country|id|position)"
    )
    parser.add_argument(
        "-q",
        "--query",
        dest="query",
        required=True,
        type=queryParser,
        help=
        "query to be used to select the relevant tweets (e.g. US, 6545086, '{-3.764647,40.332020,10}')"
    )
    parser.add_argument(
        "-i",
        "--input",
        dest="input",
        required=True,
        help=
        "URL for input stream where events are read (e.g. http://localhost:9001/events/long-polling)"
    )
    parser.add_argument(
        "-o",
        "--output",
        dest="output",
        required=True,
        help=
        "URL for output stream where events are published (e.g. http://localhost:9002/events/publish)"
    )
    options = parser.parse_args()

    user = options.user
    inputUrl = options.input
    outputUrl = options.output
    queryType = options.type
    queryValue = options.query
    onlyWatched = options.watched

    # Check query format
    if queryType == "id":
        try:
            if len(queryValue) != 1:
                raise Exception("Invalid Id")
            int(queryValue[0])
        except:
            print(
                'The GeoNames identifier in id queries should be an integer number, e.g. 6544487, change -q argument'
            )
            return

    if queryType == "country":
        try:
            if (len(queryValue) != 1) or not re.match("^[A-Z]{2}$",
                                                      queryValue[0]):
                raise Exception("Invalid Coutry Code")
        except:
            print(
                'Use ISO-3166 2 digit country codes in country queries, e.g. US, change -q argument'
            )
            return

    if queryType == "position":
        try:
            if len(queryValue != 3):
                raise Exception("Invalid Area")
            (long, lat, radius) = tuple(queryValue)
            float(long)
            float(lat)
            float(radius)
        except:
            print(
                'The position query should provide three arguments, longitude, latitude and radius, e.g. "{-3.764647,40.332020,10}", change the -q argument'
            )

    # Client to listen to geolocated tweet stream
    clnt = SynchronousClient(inputUrl)

    # Access the geonames API
    geo = GeonamesClient(username=user)

    # Case 1: query by list of geonamesIds
    watchedIds = []
    if queryType == "id":
        # Expand the input geonamesId to a list of relevant ids to be watched
        # watchedIds = geo.children(int(queryValue), godown=1)
        #
        # Mockup for Madrid area, previously obtained by calling: children(3117735, godown=1)
        watchedIds = [6545086, 6544487, 6545080, 6947399, 6545095, 6544493, 3125239, 6544492, 6545081, 6544494, \
                           124964, 6545089, 6545079, 6545097, 3123115, 3120635, 3119589, 6545077, 6545082, 3118903, \
                           6545078, 6545084, 6544099, 6545085, 3116156, 6545090, 6545088, 3113943, 6324376, 3119198, \
                           3112772, 3112737, 6544495, 6544491, 6545087, 6545083, 3108118, 6544490, 3106970]

        print "List of geonamesIds to be watched: ", watchedIds

    # Case 2: query by position and radius
    if queryType == "position":
        # Find geonamesId near the position
        (long, lat, radius) = tuple(queryValue)
        watchedIds = geo.findNearbyPlaceNames(long, lat, radius, maxResults=30)
        # Mockup for Madrid city center obtained by calling findNearbyPlaceNames("-3.704211", "40.416992", radius=1, maxResults=10)
        # watchedIds = [6545083, 3117735, 6544494, 6545088, 6545077, 6545082, 6545081, 6545084]
        print "List of geonamesIds to be watched: ", watchedIds

    # Case 3: query by country code
    watchedCountry = []
    if queryType == "country":
        watchedCountry.append(queryValue[0])
        print "Country code to be watched ", watchedCountry

    # Publisher to push the generated events
    publisher = client.SynchronousEventPublisher(outputUrl)

    counter = 0

    while True:
        events = clnt.receive_events()
        for event in events:
            # Bind the GEONAMES namespace in the graph
            event.body.bind("geo", "http://www.geonames.org/ontology#")

            # Find the tweet id
            (tweet_id, obj) = list(event.body.subject_objects(DC["date"]))[0]

            # Find the longitude and latitude
            long = list(event.body.objects(tweet_id, GEO["long"]))[0]
            lat = list(event.body.objects(tweet_id, GEO["lat"]))[0]

            # Find the Geonames information associated to the coordinates
            place = geo.findNearbyPlaceName(long, lat)
            geoId = None
            if place != None:
                (geoId, toponym, country) = place
                event.body.add(
                    (tweet_id, GEONAMES["geonameId"], Literal(str(geoId))))
                event.body.add(
                    (tweet_id, GEONAMES["toponymName"], Literal(toponym)))
                event.body.add(
                    (tweet_id, GEONAMES["countryCode"], Literal(str(country))))

            # Forward the modified event
            if onlyWatched:
                if (geoId in watchedIds) or (country in watchedCountry):
                    publisher.publish(event)
                    print event
                    counter += 1
                    print "***", counter, "events published\n"
            else:
                publisher.publish(event)
                print event
                counter += 1
                print "***", counter, "events published\n"
Example #3
0
def main():
    
    # Some useful namespace declarations
    GEONAMES = Namespace("http://www.geonames.org/ontology#") 
    GEO = Namespace("http://www.w3.org/2003/01/geo/wgs84_pos#")
    DC = Namespace("http://purl.org/dc/elements/1.1/")

    # Parse input arguments
    parser = argparse.ArgumentParser()
    parser.add_argument("-u", "--user", dest="user", required=True, 
                  help="the identifier of a registered GeoNames user (e.g. demo)")
    parser.add_argument("-w", "--watched", dest="watched", action='store_const', const=True, default=False,
                  help="a boolean flag indicating whether all tweets, or only those whose location is in the watched list should be published")
    parser.add_argument("-t", "--type", dest="type", required=True, choices=["id","country","position"],
                  help="query type to be used to select the relevant tweets (country|id|position)")
    parser.add_argument("-q", "--query", dest="query", required=True, type=queryParser,
                  help="query to be used to select the relevant tweets (e.g. US, 6545086, '{-3.764647,40.332020,10}')")
    parser.add_argument("-i", "--input", dest="input", required=True,
                  help="URL for input stream where events are read (e.g. http://localhost:9001/events/long-polling)")
    parser.add_argument("-o", "--output", dest="output", required=True,
                  help="URL for output stream where events are published (e.g. http://localhost:9002/events/publish)")
    options = parser.parse_args()

    user = options.user
    inputUrl = options.input
    outputUrl = options.output
    queryType = options.type
    queryValue = options.query
    onlyWatched = options.watched

    # Check query format
    if queryType == "id":
        try:
            if len(queryValue) != 1: 
                raise Exception("Invalid Id")
            int(queryValue[0])
        except:
            print('The GeoNames identifier in id queries should be an integer number, e.g. 6544487, change -q argument')
            return

    if queryType == "country":
        try:
            if (len(queryValue) != 1) or not re.match("^[A-Z]{2}$", queryValue[0]): 
                raise Exception("Invalid Coutry Code")
        except:
            print('Use ISO-3166 2 digit country codes in country queries, e.g. US, change -q argument')
            return

    if queryType == "position":
        try:
            if len(queryValue != 3): 
                raise Exception("Invalid Area")
            (long,lat,radius) = tuple(queryValue)
            float(long)
            float(lat)
            float(radius)
        except:
            print('The position query should provide three arguments, longitude, latitude and radius, e.g. "{-3.764647,40.332020,10}", change the -q argument')

    # Client to listen to geolocated tweet stream
    clnt = SynchronousClient(inputUrl)

    # Access the geonames API
    geo = GeonamesClient(username = user)

    # Case 1: query by list of geonamesIds
    watchedIds = []
    if queryType == "id":
        # Expand the input geonamesId to a list of relevant ids to be watched
        # watchedIds = geo.children(int(queryValue), godown=1)
        #
        # Mockup for Madrid area, previously obtained by calling: children(3117735, godown=1)
        watchedIds = [6545086, 6544487, 6545080, 6947399, 6545095, 6544493, 3125239, 6544492, 6545081, 6544494, \
                           124964, 6545089, 6545079, 6545097, 3123115, 3120635, 3119589, 6545077, 6545082, 3118903, \
                           6545078, 6545084, 6544099, 6545085, 3116156, 6545090, 6545088, 3113943, 6324376, 3119198, \
                           3112772, 3112737, 6544495, 6544491, 6545087, 6545083, 3108118, 6544490, 3106970]
        
        print("List of geonamesIds to be watched: {}".format(watchedIds))

    # Case 2: query by position and radius
    if queryType == "position":
        # Find geonamesId near the position
        (long,lat,radius) = tuple(queryValue)
        watchedIds = geo.findNearbyPlaceNames(long, lat, radius, maxResults = 30)
        # Mockup for Madrid city center obtained by calling findNearbyPlaceNames("-3.704211", "40.416992", radius=1, maxResults=10)
        # watchedIds = [6545083, 3117735, 6544494, 6545088, 6545077, 6545082, 6545081, 6545084]
        print("List of geonamesIds to be watched: {}".format(watchedIds))
    
    # Case 3: query by country code
    watchedCountry = []
    if queryType == "country":
        watchedCountry.append(queryValue[0])
        print("Country code to be watched {}".format(watchedCountry))

    # Publisher to push the generated events 
    publisher = client.SynchronousEventPublisher(outputUrl)

    counter = 0
    
    while True:
        events = clnt.receive_events()
        for event in events:
            # Bind the GEONAMES namespace in the graph
            event.body.bind("geo", "http://www.geonames.org/ontology#")

            # Find the tweet id
            (tweet_id, obj) = list(event.body.subject_objects(DC["date"]))[0]

            # Find the longitude and latitude
            long = list(event.body.objects(tweet_id, GEO["long"]))[0]
            lat = list(event.body.objects(tweet_id, GEO["lat"]))[0]

            # Find the Geonames information associated to the coordinates
            place = geo.findNearbyPlaceName(long, lat)
            geoId = None
            if place != None:                
                (geoId, toponym, country) = place
                event.body.add((tweet_id, GEONAMES["geonameId"], Literal(str(geoId))))
                event.body.add((tweet_id, GEONAMES["toponymName"], Literal(toponym)))
                event.body.add((tweet_id, GEONAMES["countryCode"], Literal(str(country))))

            # Forward the modified event
            if onlyWatched:
                if (geoId in watchedIds) or (country in watchedCountry):
                    publisher.publish(event)
                    print(event)
                    counter += 1
                    print("*** {} events published\n".format(counter))
            else:
                publisher.publish(event)
                print(event)
                counter += 1
                print("*** {} events published\n".format(counter))