def resultsPack(register, user_id_comma): ######### RESULTS PACK CREATION results_pack = [] ########### CONNECTION TO SERGE DATABASE database = toolbox.limitedConnection(path.basename(__file__)) ######### LABEL SETTINGS RECOVERY label = ((path.basename(__file__)).split("."))[0] label_design = toolbox.stylishLabel(label, database) ######### RESULTS FOR TRWEET : TWEETS ATTRIBUTES RECOVERY FOR PLAIN TRWEETS query_plain = ("SELECT id, author, tweet, date, likes, retweets, link, inquiry_id FROM results_plain_trweet_serge WHERE (send_status NOT LIKE %s AND read_status NOT LIKE %s AND owners LIKE %s)") query_targets = ("SELECT id, author, tweet, date, likes, retweets, link, inquiry_id FROM results_targets_trweet_serge WHERE (send_status NOT LIKE %s AND read_status NOT LIKE %s AND owners LIKE %s)") call_trweets = database.cursor() call_trweets.execute(query_plain, (user_id_comma, user_id_comma, user_id_comma)) plain_trweets = [list(elem) for elem in list(call_trweets.fetchall())] call_trweets.execute(query_targets, (user_id_comma, user_id_comma, user_id_comma)) targets_trweets = [list(elem) for elem in list(call_trweets.fetchall())] call_trweets.close() full_trweets = plain_trweets + targets_trweets for trweet in full_trweets: ######### SEARCH FOR SOURCE NAME AND COMPLETE REQUEST OF THE USER query_inquiry = "SELECT inquiry, applicable_owners_sources FROM inquiries_trweet_serge WHERE id = %s AND applicable_owners_sources LIKE %s AND active > 0" item_arguments = { "user_id": register, "source_id": None, "inquiry_id": filter(None, str(trweet[7]).split(",")), "query_source": None, "query_inquiry": query_inquiry, "multisource": False} attributes = toolbox.packaging(item_arguments, connection) description = (trweet[1] + "\n" + trweet[3] + ", likes : " + trweet[4] + ", retweets : " + trweet[5]).strip().encode('ascii', errors='xmlcharrefreplace') ######### ITEM ATTRIBUTES PUT IN A PACK FOR TRANSMISSION TO USER item = { "id": trweet[0], "title": trweet[2].strip().encode('ascii', errors='xmlcharrefreplace').lower().capitalize(), "description": description, "link": trweet[6].strip().encode('ascii', errors='xmlcharrefreplace'), "label": label, "source": trweet[1], "inquiry": attributes["inquiry"], "wiki_link": None} item.update(label_design) results_pack.append(item) return results_pack
def resultsPack(register, user_id_comma): #TODO add link to vigiserge calendar page when unavailable ######### RESULTS PACK CREATION results_pack = [] ########### CONNECTION TO SERGE DATABASE database = toolbox.limitedConnection(path.basename(__file__)) ######### LABEL SETTINGS RECOVERY label = ((path.basename(__file__)).split("."))[0] label_design = toolbox.stylishLabel(label, database) ######### RESULTS FOR CALENDARS : EVENTS ATTRIBUTES RECOVERY query_calendars = ("SELECT id, name, date, location, description, link, source_id, inquiry_id FROM results_kalendar_serge WHERE (send_status NOT LIKE %s AND read_status NOT LIKE %s AND owners LIKE %s)") call_calendars = database.cursor() call_calendars.execute(query_calendars, (user_id_comma, user_id_comma, user_id_comma)) rows = [list(elem) for elem in list(call_calendars.fetchall())] call_calendars.close() for row in rows: ######### SEARCH FOR SOURCE NAME AND COMPLETE REQUEST OF THE USER query_source = "SELECT name FROM sources_kalendar_serge WHERE id = %s and type <> 'language'" query_inquiry = "SELECT inquiry, applicable_owners_sources FROM inquiries_kalendar_serge WHERE id = %s AND applicable_owners_sources LIKE %s AND active > 0" item_arguments = { "user_id": register, "source_id": row[6], "inquiry_id": filter(None, str(row[7]).split(",")), "query_source": query_source, "query_inquiry": query_inquiry, "multisource": True} attributes = toolbox.packaging(item_arguments, connection) description = (row[2] + ", " + row[3] + "\n" + row[4]).strip().encode('ascii', errors='xmlcharrefreplace') ######### ITEM ATTRIBUTES PUT IN A PACK FOR TRANSMISSION TO USER item = { "id": row[0], "title": row[1].strip().encode('ascii', errors='xmlcharrefreplace').lower().capitalize(), "description": description, "link": row[5].strip().encode('ascii', errors='xmlcharrefreplace'), "label": label, "source": attributes["source"], "inquiry": attributes["inquiry"], "wiki_link": None} item.update(label_design) results_pack.append(item) return results_pack
def saveTheDate(query_checking, query_insertion, query_update, item): ########### CONNECTION TO SERGE DATABASE database = toolbox.limitedConnection(path.basename(__file__)) ########### ITEM EXTRACTION FOR OPERATIONS event = { "name": item[0], "date": item[1], "location": item[2], "source_id": item[3], "inquiry_id": item[4], "owner": item[5]} ########### DATABASE CHECKING call_data_cheking = database.cursor() call_data_cheking.execute(query_checking, (event["name"], event["date"], event["location"])) checking = call_data_cheking.fetchone() call_data_cheking.close() if checking is not None: dataset = { "complete_inquiries_id": checking[0], "complete_owners": checking[1], "split_owners": filter(None, checking[1].split(","))} ########### NEW ATTRIBUTES CREATION (COMPLETE ID & COMPLETE OWNERS) if event["inquiry_id"] not in dataset["complete_inquiries_id"]: dataset["complete_inquiries_id"] = dataset["complete_inquiries_id"] + event["inquiry_id"].replace(",","") + "," if event["owner"] not in dataset["complete_owners"]: dataset["complete_owners"] = dataset["complete_owners"] + event["owner"].replace(",","") + "," ########### CREATE A SET IN ORDER TO UPDATE THE DATABASE item_update = [dataset["complete_inquiries_id"], dataset["complete_owners"], event["name"]] update_data = database.cursor() try: update_data.execute(query_update, (item_update)) database.commit() except Exception, except_type: database.rollback() logger_error.error("ROLLBACK AT UPDATE IN insertOrUpdate FUNCTION") logger_error.error(query_update) logger_error.error(repr(except_type)) update_data.close()
def twitterConnection(): """Connexion to Twitter API""" ########### CONNECTION TO SERGE DATABASE database = toolbox.limitedConnection(path.basename(__file__)) ######### TWITTER TOKENS call_tokens = database.cursor() call_tokens.execute("SELECT consumer_key, consumer_secret, access_token, access_token_secret FROM credentials_trweet_serge") tokens = call_tokens.fetchone() call_tokens.close() consumer_key = tokens[0] consumer_secret = tokens[1] access_token = tokens[2] access_token_secret = tokens[3] auth = tweepy.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) api = tweepy.API(auth) return api
def kalendarExplorer(now): ########### CONNECTION TO SERGE DATABASE database = toolbox.limitedConnection(path.basename(__file__)) ######### VARIABLES NEEDED calendars_list = [] ######### CALL TO TABLE sources_kalendar_serge call_calendar = database.cursor() call_calendar.execute("SELECT id, link FROM sources_kalendar_serge WHERE active >= 1") rows = call_calendar.fetchall() call_calendar.close() for calendar in rows: calendar = "id": calendar[0], "id_comma": "," + calendar[0] + ",", "id_sql": "%," + calendar[0] + ",%", "link": calendar[1]} calendars_list.append(calendar) ######### GO TO CALENDAR AND PARSING for calendar in calendars_list: req_results = sergenet.aLinkToThePast(calendar["link"], 'fullcontent') calendar = Calendar.from_ical(req_results[0]) calendar_error = req_results[1] event_list = [] for event in calendar.walk('vevent'): try: date = event.decoded('dtstart') date = time.mktime(date.timetuple()) if date == "" or date is None: date = float(0) except: logger_error.warning("BEACON ERROR : missing <date> in " + calendar["link"]) logger_error.warning(traceback.format_exc()) date = float(0) try: summary = event.decoded('summary') if summary == "" or summary is None: summary = "NO TITLE" except (AttributeError, summary == ""): logger_error.warning("BEACON ERROR : missing <title> in " + calendar["link"]) logger_error.warning(traceback.format_exc()) summary = "NO TITLE" try: location = event.decoded('location') if location == "" or location is None: location = "NO LOCATION" except (AttributeError, location == ""): logger_error.warning("BEACON ERROR : missing <location> in " + calendar["link"]) logger_error.warning(traceback.format_exc()) location = "NO LOCATION" try: description = event.decoded('description') if description == "" or description is None: description = "NO DESCRIPTION" except (AttributeError, description == ""): logger_error.warning("BEACON ERROR : missing <description> in " + calendar["link"]) logger_error.warning(traceback.format_exc()) description = "NO DESCRIPTION" full_event = { "name": toolbox.escaping(summary), "date": date, "location": location, "description": description} if date > now and (summary is not None or summary != "" or summary != "NO TITLE"): event_list.append(full_event) ######### INQUIRIES AND OWNERS ATTRIBUTIONS TO CALENDAR query_inquiries_calendars = "SELECT id, inquiry, applicable_owners_sources FROM inquiries_kalendar_serge WHERE applicable_owners_sources LIKE %s AND active >= 1" call_calendar = database.cursor() call_calendar.execute(query_inquiries_calendars, (calendar["id_sql"],)) rows = call_calendar.fetchall() call_calendar.close() calendars_inquiries = [] for inquiry in rows: owners_str = "," owners_list = re.findall('\|([0-9]+):[0-9!,]+' + calendar["id"] + ',', row[2]) for owner in owners_list: owners_str = owners_str + owner.strip() + "," if re.search('^(,[0-9]+)+,$', owners_str) is not None: inquiry = { "id": inquiry[0], "inquiry": inquiry[1], "owners": owners_str} calendars_inquiries.append(inquiry) for event in event_list: for inquiry in inquiries_list: ########### AGGREGATED INQUIRIES FORMAT SUPPORT aggregated_inquiries = toolbox.aggregatesSupport(inquiry["inquiry"]) fragments_nb = 0 ######### INQUIRIES RESEARCH IN CALENDAR for fragments in aggregated_inquiries: if (re.search('[^a-z]' + re.escape(fragments) + '.{0,3}(\W|$)', event["name"], re.IGNORECASE) or re.search('[^a-z]' + re.escape(fragments) + '.{0,3}(\W|$)', event["date"], re.IGNORECASE) or re.search('[^a-z]' + re.escape(fragments) + '.{0,3}(\W|$)', event["location"], re.IGNORECASE)): fragments_nb += 1 if fragments_nb == len(aggregated_inquiries): ########### ITEM BUILDING item = { "name": event["name"], "date": event["date"], "location": event["location"], "description": event["description"], "source_id": calendar["id"], "inquiry_id": inquiry["id"], "owners": inquiry["owner"]} item_columns = str(tuple(item.keys())).replace("'","") ########### QUERY FOR DATABASE CHECKING query_checking = ("SELECT inquiry_id, owners FROM results_kalendar_serge WHERE name = %s AND `date` = %s AND location = %s") ########### QUERY FOR DATABASE INSERTION query_insertion = ("INSERT INTO results_kalendar_serge" + item_columns + " VALUES (%s, %s, %s, %s, %s, %s, %s)") ########### QUERIES FOR DATABASE UPDATE query_update = ("UPDATE results_kalendar_serge SET inquiry_id = %s, owners = %s WHERE name = %s") ########### CALL insertOrUpdate FUNCTION saveTheDate(query_checking, query_insertion, query_update, item)
def startingPoint(): """A kind of main""" ########### CONNECTION TO SERGE DATABASE database = toolbox.limitedConnection(path.basename(__file__)) ######### LOGGER CALL logger_info = logging.getLogger("info_log") ######### RESEARCH ON TWITTER logger_info.info("\n\n######### TWITTER EXTENSION \n\n") ######### CALL TO inquiries_trweet_serge call_queries = database.cursor() call_queries.execute("SELECT id, type, inquiry, applicable_owners_targets, lang, last_launch FROM inquiries_trweet_serge WHERE active >= 1") rows = call_queries.fetchall() call_queries.close() search_list = [] for row in rows: owners = "," targets_list = [] for applicable_owners_targets in filter(None, row[3].split("|")): split_owners_targets = filter(None, applicable_owners_targets.split(":")) if "!" not in split_owners_targets[0]: owners = owners + split_owners_targets[0] + "," for target in filter(None, split_owners_targets[1].split(",")): if "!" not in target: targets_list.append(target) inquiry = { "id": row[0], "type": row[1], "inquiry": row[2], "applicable_owners_targets": row[3], "owners": owners, "targets": targets_list.sort(), "language": row[4], "last_launch": row[5]} search_list.append(inquiry) if len(search_list) > 0: ######### SORT LISTS search_list = sorted(search_list, key= lambda item : item["last_launch"]) ######### REMAINING CALLS rate_limit = rateLimit() remaining_search = rate_limit[0] remaining_timeline = rate_limit[2] calls_research_count = 0 calls_timeline_count = 0 ######### RESEARCH PATH for inquiry in search_list: if inquiry["type"] == "plain": if calls_research_count <= remaining_search: trweetFishing(inquiry) elif calls_research_count > remaining_search: logger_info.info("RATE LIMIT OF RESEARCH METHOD REACHED\n\n") elif inquiry["type"] == "geo": if calls_research_count <= remaining_search: lakesOfTrweets(inquiry) elif calls_research_count > remaining_search: logger_info.info("RATE LIMIT OF RESEARCH METHOD REACHED\n\n") calls_research_count = calls_research_count + 1 elif inquiry["type"] == "target": if calls_timeline_count <= remaining_timeline: trweetTorrent(inquiry) elif calls_timeline_count > remaining_timeline: logger_info.info("RATE LIMIT OF TIMELINE METHOD REACHED\n\n") calls_timeline_count = calls_timeline_count + 1
def trweetTorrent(inquiry): """The goal of this function is to catch entire timelines or specific tweets in timeline (TARGETS TYPE TRWEETS)""" ########### CONNECTION TO TWITTER API api = twitterConnection() ########### CONNECTION TO SERGE DATABASE database = toolbox.limitedConnection(path.basename(__file__)) ########### USEFUL VARIABLES fishing_time = (time.time()) inquiry_id_comma2 = "," + str(inquiry["id"]) + "," ########### RESEARCH TARGETS TIMELINES for target in inquiry["targets"]: target_owners_str = "," raw_target_owners = re.findall('[^!@A-Za-z0-9_]' + '[0-9]*' + ":" + '[@A-Za-z0-9_!,]*' + "," + target + ",", inquiry["applicable_owners_targets"]) for target_owner in raw_target_owners: target_owner = (target_owner.replace("|", "").strip().split(":"))[0] target_owners_str = target_owners_str + target_owner + "," timeline = api.user_timeline(id = target, count = 50) for trweet in timeline : author = trweet.author.name.encode("utf8") date = trweet.created_at tweet = trweet.text.encode("utf8") retweets = trweet.retweet_count likes = trweet.favorite_count trweet_id = trweet.id pseudo = trweet.author.screen_name.encode("utf8") link = "https://twitter.com/" + str(pseudo) + "/status/" + str(trweet_id) + "/" ########### HASH TWEET ID salt = "blackSalt" trweet_id = hashlib.sha256(salt + ":" + str(trweet_id)).hexdigest() ########### AGGREGATED INQUIRIES FORMAT SUPPORT aggregated_inquiries = toolbox.aggregatesSupport(inquiry["inquiry"]) for fragments in aggregated_inquiries: if (re.search('[^a-z]' + re.escape(aggregated_inquiries) + '.{0,3}(\W|$)', tweet, re.IGNORECASE) or re.search('^' + re.escape(':all') + '$', inquiry["inquiry"], re.IGNORECASE)) and re.search('^([,]{1}[A-Za-z0-9@_]+)*[,]{1}$', owners_str) is not None: fragments_nb += 1 if fragments_nb == len(aggregated_inquiries): ########### ITEM BUILDING item = { "type": "targets", "author": author, "tweet": tweet, "date": date, "likes": likes, "retweets": retweets, "latitude": None, "longitude": None, "country": None, "link": link, "trweet_id": trweet_id, "inquiry_id": inquiry_id_comma2, "owners": inquiry["owners"]} item_columns = str(tuple(item.keys())).replace("'","") ########### SEARCH TRWEET QUERIES query_checking = ("SELECT inquiry_id, owners FROM results_trweet_serge WHERE trweet_id = %s") query_update = ("UPDATE results_trweet_serge SET inquiry_id = %s, owners = %s WHERE trweet_id = %s and type = %s") query_insertion = ("INSERT INTO results_trweet_serge " + item_columns + " VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)") query_fishing_time = ("UPDATE inquiries_trweet_serge SET last_launch = %s WHERE id = %s") ########### CALL trweetBucket FUNCTION trweetBucket(item, inquiry_id, inquiry_id_comma2, geo_species, fishing_time, query_checking, query_update, query_insertion, query_fishing_time, database)
def lakesOfTrweets(inquiry): """The goal of this function is to catch geolocalisation data in tweets that contains the inquiry saved in the database (PlAIN TYPE TRWEETS)""" ########### CONNECTION TO TWITTER API api = twitterConnection() ########### CONNECTION TO SERGE DATABASE database = toolbox.limitedConnection(path.basename(__file__)) ########### USEFUL VARIABLES fishing_time = int(time.time()) inquiry_id_comma2 = "," + str(inquiry["id"]) + "," ########### RESEARCH GEOLOCALIZED TWEETS if inquiry["language"] is None: shoal = api.search(q = inquiry["inquiry"], count = 100, show_user = False) else: shoal = api.search(q = inquiry["inquiry"], lang = inquiry["language"] , count = 100, show_user = False) for trweet in shoal : place = trweet.place ########### DATE PROCESSING date = trweet.created_at date = date.timetuple() date = datetime.datetime(date.tm_year, date.tm_mon, date.tm_mday, date.tm_hour, date.tm_min) date = date.timetuple() date = time.mktime(date) ########### HASH TWEET ID trweet_id = trweet.id salt = "blackSalt" trweet_id = hashlib.sha256(salt + ":" + str(trweet_id)).hexdigest() ########### SET COORDINATES LISTS longitudes_list = [] latitudes_list = [] center_latitude = 0 center_longitude = 0 if place is not None: country = trweet.place.country_code coordinates = trweet.place.bounding_box.coordinates[0] for point in coordinates: latitude = point[1] longitude = point[0] latitudes_list.append(latitude) longitudes_list.append(longitude) for latitude in latitudes_list: center_latitude = center_latitude + latitude for longitude in longitudes_list: center_longitude = center_longitude + longitude center_latitude = (center_latitude/4) center_longitude = (center_longitude/4) ########### ITEM BUILDING item = { "type": "geo", "author": None, "tweet": None, "date": date, "likes": None, "retweets": None, "latitude": center_latitude, "longitude": center_longitude, "country": country, "link": None, "trweet_id": trweet_id, "inquiry_id": inquiry_id_comma2, "owners": inquiry["owners"]} item_columns = str(tuple(item.keys())).replace("'","") ########### SEARCH TRWEET QUERIES query_checking = ("SELECT inquiry_id, owners FROM results_trweet_serge WHERE trweet_id = %s") query_update = ("UPDATE results_trweet_serge SET inquiry_id = %s, owners = %s WHERE trweet_id = %s and type = 'plain'") query_insertion = ("INSERT INTO results_trweet_serge " + item_columns + " VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)") query_fishing_time = ("UPDATE inquiries_trweet_serge SET last_launch = %s WHERE id = %s") ########### CALL trweetBucket FUNCTION trweetBucket(item, inquiry_id, inquiry_id_comma2, geo_species, fishing_time, query_checking, query_update, query_insertion, query_fishing_time, database)
def trweetFishing(inquiry): """The goal of this function is to catch tweets that contains the inquiry saved in the database (PlAIN TYPE TRWEETS)""" ########### CONNECTION TO TWITTER API api = twitterConnection() ########### CONNECTION TO SERGE DATABASE database = toolbox.limitedConnection(path.basename(__file__)) ########### USEFUL VARIABLES fishing_time = int(time.time()) inquiry_id_comma2 = "," + str(inquiry["id"]) + "," ########### RESEARCH PLAIN TWEETS if inquiry["language"] is None: chirp_list = api.search(q = inquiry["inquiry"], count = 100, show_user = True) else: chirp_list = api.search(q = inquiry["inquiry"], lang = inquiry["language"] , count = 100, show_user = True) for trweet in chirp_list: author = trweet.author.name.encode("utf8") date = trweet.created_at tweet = trweet.text.encode("utf8") retweets = trweet.retweet_count likes = trweet.favorite_count trweet_id = trweet.id pseudo = trweet.author.screen_name.encode("utf8") link = "https://twitter.com/" + str(pseudo) + "/status/" + str(trweet_id) + "/" ########### HASH TWEET ID salt = "blackSalt" trweet_id = hashlib.sha256(salt + ":" + str(trweet_id)).hexdigest() ########### ITEM BUILDING item = { "type": "plain", "author": author, "tweet": tweet, "date": date, "likes": likes, "retweets": retweets, "latitude": None, "longitude": None, "country": None, "link": link, "trweet_id": trweet_id, "inquiry_id": inquiry_id_comma2, "owners": inquiry["owners"]} item_columns = str(tuple(item.keys())).replace("'","") ########### SEARCH TRWEET QUERIES query_checking = ("SELECT inquiry_id, owners FROM results_trweet_serge WHERE trweet_id = %s") query_update = ("UPDATE results_trweet_serge SET inquiry_id = %s, owners = %s WHERE trweet_id = %s and type = 'plain'") query_insertion = ("INSERT INTO results_trweet_serge" + item_columns + " VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)") query_fishing_time = ("UPDATE inquiries_trweet_serge SET last_launch = %s WHERE id = %s") ########### CALL trweetBucket FUNCTION trweetBucket(item, inquiry_id, inquiry_id_comma2, geo_species, fishing_time, query_checking, query_update, query_insertion, query_fishing_time)