def hashtag_stats(): check_valid_arguments(request.args, ["to", "from", "count"]) time_from, time_to = validate_and_set_interval(request.args) if 'count' in request.args: validate_count(request.args['count']) hashtags = TweetWish\ .query\ .filter(func.unix_timestamp(TweetWish.created_at) < time_to)\ .filter(func.unix_timestamp(TweetWish.created_at) >= time_from)\ .join(tweet_contains_hashtag)\ .join(Hashtag)\ .with_entities(Hashtag.hashtag)\ .add_columns(func.count(Hashtag.hashtag))\ .group_by(Hashtag.hashtag)\ .order_by(desc(func.count(Hashtag.hashtag)))\ .limit(request.args['count'])\ .all() else: hashtags = TweetWish\ .query\ .filter(func.unix_timestamp(TweetWish.created_at) < time_to)\ .filter(func.unix_timestamp(TweetWish.created_at) >= time_from)\ .join(tweet_contains_hashtag)\ .join(Hashtag)\ .with_entities(Hashtag.hashtag)\ .add_columns(func.count(Hashtag.hashtag))\ .group_by(Hashtag.hashtag)\ .order_by(desc(func.count(Hashtag.hashtag)))\ .all() return jsonify( popular_hashtags=[{'hashtag': key, 'count': value} for key, value in hashtags])
def mention_stats(): check_valid_arguments(request.args, ["to", "from", "count"]) time_from, time_to = validate_and_set_interval(request.args) if 'count' in request.args: validate_count(request.args['count']) mentions = User\ .query\ .join(tweet_mentions_user)\ .join(TweetWish)\ .filter(func.unix_timestamp(TweetWish.created_at) < time_to)\ .filter(func.unix_timestamp(TweetWish.created_at) >= time_from)\ .add_columns(func.count(User.id))\ .group_by(User.id)\ .order_by(desc(func.count(User.id)))\ .limit(request.args['count'])\ .all() else: mentions = User\ .query\ .join(tweet_mentions_user)\ .join(TweetWish)\ .filter(func.unix_timestamp(TweetWish.created_at) < time_to)\ .filter(func.unix_timestamp(TweetWish.created_at) >= time_from)\ .add_columns(func.count(User.id))\ .group_by(User.id)\ .order_by(desc(func.count(User.id)))\ serialized = [] for result in mentions: serialized.append({'user': result[0].json_dump(), 'mention_count': result[1]}) return jsonify(popular_users=serialized)
def get_billing_data_per_year_per_center_days(year, project_ids): billing_data = db_session.query(func.unix_timestamp(Usage.usage_date), func.sum(Usage.cost), Usage.usage_value, Usage.measurement_unit). \ filter(func.extract('year', Usage.usage_date) == year, Usage.project_id.in_(project_ids)). \ group_by(func.unix_timestamp(Usage.usage_date)) return billing_data
def get_billing_data_per_year(year, output_type): if output_type == 'day': billing_data = db_session.query(func.unix_timestamp(Usage.usage_date), func.sum(Usage.cost), Usage.usage_value, Usage.measurement_unit). \ filter(func.extract('year', Usage.usage_date) == year).group_by(func.unix_timestamp(Usage.usage_date)) else: billing_data = db_session.query(func.extract(output_type, Usage.usage_date), func.sum(Usage.cost)). \ filter(func.extract('year', Usage.usage_date) == year).group_by(func.extract(output_type, Usage.usage_date)) return billing_data
def memberday_count(self): row =\ select( [(func.unix_timestamp(self.timestamp) - func.unix_timestamp(UserData.last_date)). label('seconds')]).\ where(UserData.friend_id == self.friend_id).\ order_by(UserData.last_date).\ limit(1).execute().first() seconds = row.seconds if row is not None else 0 return seconds / 86400.0
def event_json(eventid): event = models.Event.query\ .filter(models.Event.EventID == eventid)\ .first() videos = models.Video.query\ .filter(models.Video.Lat > float(event.LatStart), models.Video.Lat < float(event.LatEnd), models.Video.Lng > float(event.LngStart), models.Video.Lng < float(event.LngEnd), func.unix_timestamp(models.Video.Time) > (event.TimeStart-datetime(1970,1,1)).total_seconds(), func.unix_timestamp(models.Video.Time) < (event.TimeEnd-datetime(1970,1,1)).total_seconds())\ .order_by(models.Video.Time) return jsonify({'event':event.dictionary(), 'videos':[video.dictionary() for video in videos]})
def get_billing_data_per_resource_all_project_per_day_week(year, value_to_match, project_ids, resource, output_type): if output_type == 'day': billing_data = db_session.query(func.unix_timestamp(Usage.usage_date), func.sum(Usage.cost), Usage.usage_value, Usage.measurement_unit). \ filter(func.extract('year', Usage.usage_date) == year, Usage.project_id.in_(project_ids), Usage.resource_type == resource, func.extract('week', Usage.usage_date) == value_to_match). \ group_by(func.unix_timestamp(Usage.usage_date)) elif output_type == 'week': billing_data = db_session.query(Usage.project_id, func.extract(output_type, Usage.usage_date), func.sum(Usage.cost)). \ filter(func.extract('year', Usage.usage_date) == year, Usage.project_id.in_(project_ids), Usage.resource_type == resource, func.extract('week', Usage.usage_date) == value_to_match). \ group_by(func.extract(output_type, Usage.usage_date)) return billing_data
def wishes(): check_valid_arguments(request.args, ["to", "from", "count"]) time_from, time_to = validate_and_set_interval(request.args) if 'count' in request.args: validate_count(request.args['count']) wishes = TweetWish.query\ .order_by(TweetWish.created_at.desc())\ .limit(request.args['count']) else: wishes = TweetWish.query\ .filter(func.unix_timestamp(TweetWish.created_at) < time_to)\ .filter(func.unix_timestamp(TweetWish.created_at) >= time_from)\ .order_by(TweetWish.created_at.desc()) return jsonify(wishes=[item.json_dump() for item in wishes])
def get_billing_data_per_resource_per_project_per_week(year, value_to_match, project_id, resource, output_type): if output_type == 'day': billing_data = db_session.query(func.unix_timestamp(Billing.usage_date), func.sum(Billing.cost), Billing.usage_value, Billing.measurement_unit). \ filter(func.extract('year', Billing.usage_date) == year, Billing.project_id == project_id, Billing.resource_type == resource, func.extract('week', Billing.usage_date) == value_to_match). \ group_by(func.unix_timestamp(Billing.usage_date)) elif output_type == 'week': billing_data = db_session.query(Billing.project_id, func.extract('week', Billing.usage_date), func.sum(Billing.cost)). \ filter(func.extract('year', Billing.usage_date) == year, Billing.project_id == project_id, Billing.resource_type == resource, func.extract('week', Billing.usage_date) == value_to_match). \ group_by(func.extract('week', Billing.usage_date)) return billing_data
def get_billing_data_per_project_year(year, project_id, output_type): if output_type == 'day': billing_data = db_session.query(func.unix_timestamp(Billing.usage_date), func.sum(Billing.cost), Billing.usage_value, Billing.measurement_unit). \ filter(func.extract('year', Billing.usage_date) == year, Billing.project_id == project_id). \ group_by(func.unix_timestamp(Billing.usage_date)) elif output_type == 'week': billing_data = db_session.query(Billing.project_id, func.extract(output_type, Billing.usage_date), func.sum(Billing.cost)). \ filter(func.extract('year', Billing.usage_date) == year, Billing.project_id == project_id). \ group_by(func.extract(output_type, Billing.usage_date)) else: billing_data = db_session.query(func.extract(output_type, Billing.usage_date), func.sum(Billing.cost)). \ filter(func.extract('year', Billing.usage_date) == year, Billing.project_id == project_id). \ group_by(func.extract(output_type, Billing.usage_date)) return billing_data
def event_viewer(eventid): event = models.Event.query\ .filter(models.Event.EventID == eventid)\ .first() videos = models.Video.query\ .filter(models.Video.Lat > float(event.LatStart), models.Video.Lat < float(event.LatEnd), models.Video.Lng > float(event.LngStart), models.Video.Lng < float(event.LngEnd), func.unix_timestamp(models.Video.Time) > (event.TimeStart-datetime(1970,1,1)).total_seconds(), func.unix_timestamp(models.Video.Time) < (event.TimeEnd-datetime(1970,1,1)).total_seconds())\ .order_by(models.Video.Time) if videos.count() > 0: return render_template('viewer.html', videos=[video.dictionary() for video in videos], name=event.Name) else: return "no videos in this event"
def get_billing_data_per_resource_all_project_per_day_quarter(year, value_to_match, project_ids, resource, output_type): if output_type == 'day': billing_data = db_session.query(func.unix_timestamp(Usage.usage_date), func.sum(Usage.cost), Usage.usage_value, Usage.measurement_unit). \ filter(func.extract('year', Usage.usage_date) == year, Usage.project_id.in_(project_ids), Usage.resource_type == resource, func.extract('quarter', Usage.usage_date) == value_to_match). \ group_by(func.unix_timestamp(Usage.usage_date)) elif output_type == 'week': billing_data = db_session.query(Usage.project_id, func.extract(output_type, Usage.usage_date), func.sum(Usage.cost)). \ filter(func.extract('year', Usage.usage_date) == year, Usage.project_id.in_(project_ids), Usage.resource_type == resource, func.extract('quarter', Usage.usage_date) == value_to_match). \ group_by(func.extract(output_type, Usage.usage_date)) elif output_type == 'month' or output_type == 'week': billing_data = db_session.query(func.extract(output_type, Usage.usage_date), func.sum(Usage.cost)). \ filter(func.extract('year', Usage.usage_date) == year, Usage.project_id.in_(project_ids), Usage.resource_type == resource, func.extract('quarter', Usage.usage_date) == value_to_match). \ group_by(func.extract(output_type, Usage.usage_date)) return billing_data
def get_billing_data_per_resource_per_project(year, project_id, resource, output_type): if output_type == 'day': billing_data = db_session.query(func.unix_timestamp(Usage.usage_date), func.sum(Usage.cost), Usage.usage_value, Usage.measurement_unit). \ filter(func.extract('year', Usage.usage_date) == year, Usage.project_id == project_id, Usage.resource_type == resource). \ group_by(func.unix_timestamp(Usage.usage_date)) elif output_type == 'week': billing_data = db_session.query(Usage.project_id, func.extract(output_type, Usage.usage_date), func.sum(Usage.cost)). \ filter(func.extract('year', Usage.usage_date) == year, Usage.project_id == project_id, Usage.resource_type == resource). \ group_by(func.month(Usage.usage_date)) else: billing_data = db_session.query(func.extract(output_type, Usage.usage_date), func.sum(Usage.cost)). \ filter(func.extract('year', Usage.usage_date) == year, Usage.project_id == project_id, Usage.resource_type == resource). \ group_by(func.month(Usage.usage_date)) return billing_data
def stats_history(): check_valid_arguments(request.args, ["to", "from", "density"]) time_from, time_to = validate_and_set_interval(request.args) if 'density' in request.args: if request.args['density'] in ["40s", "20m", "1d"]: time_density = request.args['density'] else: raise ProcessingException( description='Invalid value for "density" parameter', code=400) else: time_density = "40s" if time_density == "40s": stats = Stats40s.query\ .filter(func.unix_timestamp(Stats40s.datetime) < time_to)\ .filter(func.unix_timestamp(Stats40s.datetime) >= time_from) if time_density == "20m": stats = Stats20m.query\ .filter(func.unix_timestamp(Stats20m.datetime) < time_to)\ .filter(func.unix_timestamp(Stats20m.datetime) >= time_from) if time_density == "1d": stats = Stats1d.query\ .filter(func.unix_timestamp(Stats1d.datetime) < time_to)\ .filter(func.unix_timestamp(Stats1d.datetime) >= time_from) serialized = [item.json_dump() for item in stats] return jsonify(stats=serialized)
def tune_history(self, tunes, desc=False, dates=None): order = PlayData.date if desc: order = sqlalchemy.desc(order) query = object_session(self).\ query(PlayData.score.label('value'), PlayData.date, func.unix_timestamp(PlayData.date).label('unixtime')).\ filter(PlayData.friend_id == self.friend_id).\ filter(util.tune_conds(tunes, PlayData)) if dates is not None: query = util.cutoff_dates(query, PlayData.date, dates) return query
def stat_history_by_query(self, stat, desc=False, dates=None): order = UserData.date if desc: order = sqlalchemy.desc(order) value_column = getattr(UserData, stat).label('value') query = object_session(self).\ query(value_column, UserData.date, func.unix_timestamp(UserData.last_date).label('unixtime')).\ filter(UserData.friend_id == self.friend_id) if dates is not None: query = util.cutoff_dates(query, UserData.last_date, dates) return query
def tune_history_ext(self, tune, desc=False, dates=None): order = PlayData.Detail.date if desc: order = sqlalchemy.desc(order) query = object_session(self).\ query(PlayData.Detail, func.unix_timestamp(PlayData.Detail.date). label('unixtime')).\ filter(PlayData.Detail.friend_id == self.friend_id).\ filter(PlayData.Detail.music_id == tune[0]).\ filter(PlayData.Detail.dif_id == tune[1]) if dates is not None: query = util.cutoff_dates(query, PlayData.date, dates) return query
def get_billing_data_per_resource_month_week_day_center(year, value_to_match, project_id, output_type): if output_type == 'day': billing_data = db_session.query(func.unix_timestamp(Usage.usage_date), func.sum(Usage.cost), Usage.usage_value, Usage.measurement_unit). \ filter(func.extract('year', Usage.usage_date) == year, func.extract('month', Usage.usage_date) == value_to_match, Usage.project_id == project_id). \ group_by(func.extract(output_type, Usage.usage_date)) else: billing_data = db_session.query(Usage.resource_type, func.extract(output_type, Usage.usage_date), func.sum(Usage.cost)). \ filter(func.extract('year', Usage.usage_date) == year, Usage.project_id == project_id, func.extract('month', Usage.usage_date) == value_to_match).group_by( func.extract(output_type, Usage.usage_date), Usage.resource_type) return billing_data
def get_billing_data_per_resource_week_day_center(year, value_to_match, project_id, output_type): if output_type == 'day': billing_data = db_session.query(func.unix_timestamp(Usage.usage_date), func.sum(Usage.cost), Usage.usage_value, Usage.measurement_unit). \ filter(func.extract('year', Usage.usage_date) == year, func.extract('week', Usage.usage_date) == value_to_match, Usage.project_id == project_id). \ group_by(func.extract(output_type, Usage.usage_date)) else: billing_data = db_session.query(Usage.resource_type, func.extract(output_type, Usage.usage_date), func.sum(Usage.cost)). \ filter(func.extract('year', Usage.usage_date) == year, Usage.project_id == project_id, func.extract('week', Usage.usage_date) == value_to_match).group_by( func.extract(output_type, Usage.usage_date), Usage.resource_type) return billing_data
def get_billing_data_per_year_week_day(year, value_to_match, output_type, project_ids): if output_type == 'day': billing_data = db_session.query(func.unix_timestamp(Billing.usage_date), func.sum(Billing.cost), Billing.usage_value, Billing.measurement_unit). \ filter(func.extract('year', Billing.usage_date) == year, Billing.project_id.in_(project_ids), func.extract('week', Billing.usage_date) == value_to_match). \ group_by(func.extract(output_type, Billing.usage_date)) elif output_type == 'week': billing_data = db_session.query(Billing.project_id, func.extract(output_type, Billing.usage_date), func.sum(Billing.cost)). \ filter(func.extract('year', Billing.usage_date) == year, func.extract('week', Billing.usage_date) == value_to_match).group_by( func.extract(output_type, Billing.usage_date), Billing.project_id) return billing_data
def get_billing_data_per_year_month_week_day(year, value_to_match, output_type, project_ids): if output_type == 'day': billing_data = db_session.query(func.unix_timestamp(Usage.usage_date), func.sum(Usage.cost), Usage.usage_value, Usage.measurement_unit). \ filter(func.extract('year', Usage.usage_date) == year, Usage.project_id.in_(project_ids), func.extract('month', Usage.usage_date) == value_to_match). \ group_by(func.extract(output_type, Usage.usage_date)) else: billing_data = db_session.query(Usage.project_id, func.extract(output_type, Usage.usage_date), func.sum(Usage.cost)). \ filter(func.extract('year', Usage.usage_date) == year, func.extract('month', Usage.usage_date) == value_to_match).group_by( func.extract(output_type, Usage.usage_date), Usage.project_id) return billing_data
def stat_history_by_property(self, stat, desc=False, dates=None): order = UserData.date if desc: order = sqlalchemy.desc(order) query = object_session(self).\ query(UserData, func.unix_timestamp(UserData.last_date).label('unixtime')).\ filter(UserData.friend_id == self.friend_id) if dates is not None: query = util.cutoff_dates(query, UserData.last_date, dates) HistItem = collections.namedtuple('HistItem', ['value', 'date', 'unixtime']) for row in query: yield HistItem( value=getattr(row.UserData, stat), date=row.UserData.last_date, unixtime=row.unixtime)
def get_billing_data_per_year_per_center_quarter(year, project_ids, quarter, output_type): if output_type == 'day': billing_data = db_session.query(func.unix_timestamp(Usage.usage_date), func.sum(Usage.cost), Usage.usage_value, Usage.measurement_unit). \ filter(func.extract('year', Usage.usage_date) == year, Usage.project_id.in_(project_ids), func.extract('quarter', Usage.usage_date) == quarter). \ group_by(func.extract(output_type, Usage.usage_date)) elif output_type == 'week': billing_data = db_session.query(Usage.project_id, func.extract(output_type, Usage.usage_date), func.sum(Usage.cost)). \ filter(func.extract('year', Usage.usage_date) == year, Usage.project_id.in_(project_ids), func.extract('quarter', Usage.usage_date) == quarter). \ group_by(func.extract(output_type, Usage.usage_date)) else: billing_data = db_session.query(func.extract(output_type, Usage.usage_date), func.sum(Usage.cost)). \ filter(func.extract('year', Usage.usage_date) == year, Usage.project_id.in_(project_ids), func.extract('quarter', Usage.usage_date) == quarter). \ group_by(func.extract(output_type, Usage.usage_date)) return billing_data
CELL_HEADER_DICT['psc'] = 'unit' # The list of cell columns, we actually need for the export CELL_COLUMN_NAMES = [ 'created', 'modified', 'lat', 'lon', 'radio', 'mcc', 'mnc', 'lac', 'cid', 'psc', 'range', 'total_measures'] CELL_COLUMN_NAME_INDICES = dict( [(e, i) for (i, e) in enumerate(CELL_COLUMN_NAMES)] ) CELL_COLUMNS = [] for name in CELL_COLUMN_NAMES: if name in ('created', 'modified'): CELL_COLUMNS.append( func.unix_timestamp(getattr(Cell.__table__.c, name))) else: CELL_COLUMNS.append(getattr(Cell.__table__.c, name)) @contextmanager def selfdestruct_tempdir(): base_path = tempfile.mkdtemp() try: yield base_path finally: shutil.rmtree(base_path) # Python 2.6 Gzipfile doesn't have __exit__ class GzipFile(gzip.GzipFile):
CELL_HEADER_DICT['cid'] = 'cell' CELL_HEADER_DICT['psc'] = 'unit' # The list of cell columns, we actually need for the export CELL_COLUMN_NAMES = [ 'created', 'modified', 'lat', 'lon', 'radio', 'mcc', 'mnc', 'lac', 'cid', 'psc', 'range', 'total_measures'] CELL_COLUMN_NAME_INDICES = dict( [(e, i) for (i, e) in enumerate(CELL_COLUMN_NAMES)] ) CELL_COLUMNS = [] for name in CELL_COLUMN_NAMES: if name in ('created', 'modified'): CELL_COLUMNS.append(func.unix_timestamp(getattr(cell_table.c, name))) else: CELL_COLUMNS.append(getattr(cell_table.c, name)) CELL_EXPORT_RADIO_NAMES = dict( [(k, v.upper()) for k, v in RADIO_TYPE_INVERSE.items()]) @contextmanager def selfdestruct_tempdir(): base_path = tempfile.mkdtemp() try: yield base_path finally: shutil.rmtree(base_path)
def get_drupal_sessions(start_time, end_time): """ Extracts request sessions from accesslog table. :param start_time: time to extract requests from :param end_time: time to extract requests to :return: Dictionary with sessions info separated by companies. """ logger.info("Started sessions extraction") timestamp_start_time = (start_time - datetime(1970, 1, 1)).total_seconds() timestamp_end_time = (end_time - datetime(1970, 1, 1)).total_seconds() readable_s = datetime.fromtimestamp(timestamp_start_time) readable_e = datetime.fromtimestamp(timestamp_end_time) access_hosts = session.query( Accesslog.timestamp, Accesslog.hostname, Accesslog.path, Accesslog.url, Accesslog.title ).filter( # between(Accesslog.timestamp, timestamp_start_time, timestamp_end_time), between(Accesslog.timestamp, func.unix_timestamp(start_time), func.unix_timestamp(end_time)), Accesslog.title != 'Generate image style', Accesslog.hostname.notin_(settings.IPS_BLACKLIST)).order_by( Accesslog.hostname, Accesslog.timestamp) accesslog = [Access(*res) for res in access_hosts] blacklist = { tup[0].lower() for tup in session.query(Company.name).filter( Company.type_main.in_(['Blacklist', 'Spam', 'Provider'])) } ips_info = { tup[0]: tup[1:] for tup in session.query(DbIpDatabase.ip_ip, DbIpDatabase.ip_country, DbIpDatabase.ip_name, DbIpDatabase.ip_name_2, DbIpDatabase.ip_address) } res = {} drupal_session = DrupalSession() session_length = 0 len_accesslog = len(accesslog[:-1]) - 1 for index, request in enumerate(accesslog[:-1]): host = ip_digits(request.hostname) access_datetime = datetime.fromtimestamp(int(request.timestamp)) next_request = accesslog[index + 1] next_request_host = ip_digits(next_request.hostname) next_request_access_datetime = datetime.fromtimestamp( int(next_request.timestamp)) difference = next_request_access_datetime - access_datetime is_continue = False if host == next_request_host and difference.seconds < settings.MAXIMUM_DIFFERENCE_BETWEEN_SESSIONS.seconds: session_length += difference.seconds is_continue = True elif host == next_request_host: session_length += settings.LONG_SESSION_DEFAULT is_continue = True elif host != next_request_host: session_length += settings.LONG_SESSION_DEFAULT if index and host == ip_digits( accesslog[index - 1].hostname) and host != next_request_host: drupal_session.append(request) elif host == next_request_host: drupal_session.append(request) is_continue = True if is_continue and index != len_accesslog: continue if host in ips_info: country, company_name, address_result, full_address_result = ips_info[ host] else: country = company_name = address_result = full_address_result = '' try: country, company_name, address_result, full_address_result = get_whois( host) except Exception as e: logger.error( 'get_whois function (RIPE) got an error for host: {}\nError: {}' .format(host, str(e))) continue finally: address_result = address_result[:250] logger.debug(address_result) full_address_result = full_address_result[:350] new_entry = DbIpDatabase(ip_ip=host, ip_country=country, ip_name=company_name, ip_name_2=address_result, ip_address=full_address_result, ip_host=host, ip_timestamp=func.now()) session.add(new_entry) ips_info[host] = (country, company_name, address_result, full_address_result) company_name = company_name.lower() if company_name and country in settings.RELEVANT_COUNTRIES \ and company_name not in settings.PROVIDERS_BLACKLIST \ and company_name not in blacklist \ and not any(word in company_name for word in settings.COMPANIES_BLACKLIST) \ and not any(re.search(regexp, company_name) for regexp in settings.PROVIDERS_BLACKLIST_REGEXPS) \ and not any(re.search(regexp, company_name) for regexp in settings.COMPANIES_BLACKLIST_REGEXPS): if company_name not in res: res[company_name] = CompanyEntry(*ips_info[host], sessions=[]) res[company_name].sessions.append(drupal_session) res[company_name].session_length = timedelta( seconds=session_length) drupal_session = DrupalSession() session_length = 0 session.commit() logger.info('Sessions extraction has been finished successfully.') return res
def _build_entry_companies(self): end_time = datetime.now() start_time = end_time - timedelta(days=settings.REPORT_PERIOD_IN_DAYS) accesslog = [ Access(*res) for res in alchemy_session.query( Accesslog.timestamp, Accesslog.hostname, Accesslog.path, Accesslog.url, Accesslog.title).filter( between(Accesslog.timestamp, func.unix_timestamp( start_time), func.unix_timestamp(end_time)), Accesslog.title != 'Generate image style', Accesslog.hostname.notin_(settings.IPS_BLACKLIST)). order_by(Accesslog.hostname, Accesslog.timestamp) ] blacklist = { tup[0].lower() for tup in alchemy_session.query(Company.name).filter( Company.type_main.in_(['Blacklist', 'Spam', 'Provider'])) } ips_info = { tup[0]: tup[1:] for tup in alchemy_session.query( DbIpDatabase.ip_ip, DbIpDatabase.ip_country, DbIpDatabase. ip_name, DbIpDatabase.ip_name_2, DbIpDatabase.ip_address) } res = {} drupal_session = DrupalSession() session_length = 0 len_accesslog = len(accesslog[:-1]) - 1 for index, request in enumerate(accesslog[:-1]): host = ip_digits(request.hostname) access_datetime = datetime.fromtimestamp(int(request.timestamp)) next_request = accesslog[index + 1] next_request_host = ip_digits(next_request.hostname) next_request_access_datetime = datetime.fromtimestamp( int(next_request.timestamp)) difference = next_request_access_datetime - access_datetime is_continue = False if host == next_request_host and difference.seconds < settings.MAXIMUM_DIFFERENCE_BETWEEN_SESSIONS.seconds: session_length += difference.seconds is_continue = True elif host == next_request_host: session_length += settings.LONG_SESSION_DEFAULT is_continue = True elif host != next_request_host: session_length += settings.LONG_SESSION_DEFAULT if index and host == ip_digits( accesslog[index - 1].hostname) and host != next_request_host: drupal_session.append(request) elif host == next_request_host: drupal_session.append(request) is_continue = True if is_continue and index != len_accesslog: continue if host in ips_info: country, company_name, address_result, full_address_result = ips_info[ host] company_name = company_name.lower() if company_name and country in settings.RELEVANT_COUNTRIES \ and company_name not in settings.PROVIDERS_BLACKLIST \ and company_name not in blacklist \ and not any(word in company_name for word in settings.COMPANIES_BLACKLIST) \ and not any(re.search(regexp, company_name) for regexp in settings.PROVIDERS_BLACKLIST_REGEXPS) \ and not any(re.search(regexp, company_name) for regexp in settings.COMPANIES_BLACKLIST_REGEXPS): if company_name not in res: res[company_name] = CompanyEntry(*ips_info[host], sessions=[]) res[company_name].sessions.append(drupal_session) res[company_name].session_length = timedelta( seconds=session_length) session_length = 0 drupal_session = DrupalSession() return res
def insert_migrate(src, dst, day, limit): def use(): print 'Command get_migrating usage' print 'get_migrating -s/--source [x,x,x]/x,x,x -d/--dest [y,y,y]/y,y,y -da/--day n -l/--limit z' print 'Example: ' print '1. get_migrating -s [1,2,3] -d [4,5,6] -da 1 -l 10' print '2. get_migrating -s 1,2,3 -d 4,5,6 -da 1 -l 10' print '3. get_migrating -s 1 -d 2 -da 1 -l 10' if not src or not dst or not limit or not day: use() return patt = re.compile(r'\[(\d+,?)+\]|\d+,?|\d+') if not patt.match(src) or not patt.match(dst): use() return try: limit = int(limit) day = int(day) except: use() return src = re.findall('\d+', src) dst = re.findall('\d+', dst) ost_fid = {} count = 0 src_files = {} sysFile = sql.session.query(SystemFile.id, SystemFile.storage_id).join(UserFile, SystemFile.id==UserFile.pid) for ost in src: ost = int(ost) src_files[ost] = sysFile.filter(SystemFile.storage_id == ost, SystemFile.count > 0, UserFile.lastdownload < func.unix_timestamp(datetime.datetime.now() - datetime.timedelta(days=day))) \ .order_by(UserFile.size.desc()).limit(limit).all() while limit != 0: for ost in src_files.keys(): if ost not in ost_fid.keys(): ost_fid[ost] = 0 if len(src_files[ost]) == 0: del src_files[ost] del ost_fid[ost] continue try: mongo.Migration.collection.insert({ '_id': src_files[ost][ost_fid[ost]][0] , 'src_id': ost, 'dst_id': int(dst[count]), 'stat': 'QUEUE', }) except PyMongoError as e: ost_fid[ost] += 1 if len(src_files[ost]) == ost_fid[ost]: del ost_fid[ost] del src_files[ost] continue ost_fid[ost] += 1 if len(src_files[ost]) == ost_fid[ost]: del ost_fid[ost] del src_files[ost] count += 1 if len(dst) == count: count = 0 limit = limit -1
Column('id', INTEGER(unsigned=True), primary_key=True), Column('email', VARCHAR(128), unique=True, nullable=False), Column('username', VARCHAR(32), unique=True, nullable=False), Column('password', VARCHAR(64), nullable=False), Column('secret', VARCHAR(32), nullable=False), # salt Column('actived', BOOLEAN, nullable=False, default=0), # user active Column('active_token', VARCHAR(32), index=True), # user active link Column('login_sequence', VARCHAR(32), index=True), # login_sequence Column('login_token', VARCHAR(32), index=True), # login_token Column('admin', BOOLEAN, nullable=False, default=0), Column('root', BOOLEAN, nullable=False, default=0), Column('_login_addr', INTEGER(unsigned=True), nullable=False), Column('_last_login', INTEGER(unsigned=True), nullable=False, default=func.unix_timestamp()), Column('_created', INTEGER(unsigned=True), nullable=False, default=func.unix_timestamp()), Column('_modified', INTEGER(unsigned=True), nullable=False, default=func.unix_timestamp(), onupdate=func.unix_timestamp()), mysql_engine='InnoDB', mysql_charset='utf8', ) user_profile = Table( 'user_profile',
CELL_HEADER_DICT['cid'] = 'cell' CELL_HEADER_DICT['psc'] = 'unit' # The list of cell columns, we actually need for the export CELL_COLUMN_NAMES = [ 'created', 'modified', 'lat', 'lon', 'radio', 'mcc', 'mnc', 'lac', 'cid', 'psc', 'range', 'total_measures' ] CELL_COLUMN_NAME_INDICES = dict([(e, i) for (i, e) in enumerate(CELL_COLUMN_NAMES)]) CELL_COLUMNS = [] for name in CELL_COLUMN_NAMES: if name in ('created', 'modified'): CELL_COLUMNS.append( func.unix_timestamp(getattr(Cell.__table__.c, name))) else: CELL_COLUMNS.append(getattr(Cell.__table__.c, name)) CELL_EXPORT_RADIO_NAMES = dict([(k, v.upper()) for k, v in RADIO_TYPE_INVERSE.items()]) @contextmanager def selfdestruct_tempdir(): base_path = tempfile.mkdtemp() try: yield base_path finally: shutil.rmtree(base_path)
Column('id', INTEGER(unsigned=True), primary_key=True), Column('email', VARCHAR(128), unique=True, nullable=False), Column('username', VARCHAR(32), unique=True, nullable=False), Column('password', VARCHAR(64), nullable=False), Column('secret', VARCHAR(32), nullable=False), # salt Column('actived', BOOLEAN, nullable=False, default=0), # user active Column('active_token', VARCHAR(32), index=True), # user active link Column('login_sequence', VARCHAR(32), index=True), # login_sequence Column('login_token', VARCHAR(32), index=True), # login_token Column('admin', BOOLEAN, nullable=False, default=0), Column('root', BOOLEAN, nullable=False, default=0), Column('_login_addr', INTEGER(unsigned=True), nullable=False), Column('_last_login', INTEGER(unsigned=True), nullable=False, default=func.unix_timestamp()), Column('_created', INTEGER(unsigned=True), nullable=False, default=func.unix_timestamp()), Column('_modified', INTEGER(unsigned=True), nullable=False, default=func.unix_timestamp(), onupdate=func.unix_timestamp()), mysql_engine = 'InnoDB', mysql_charset = 'utf8', ) user_profile = Table('user_profile', metadata, Column('id', INTEGER(unsigned=True), primary_key=True), Column('user_id', INTEGER(unsigned=True), ForeignKey('user_base.id'), nullable=False), Column('thumb_id', INTEGER(unsigned=True), ForeignKey('user_image.id')),