def get_messages_per_day(self, conversation_id): messages_per_day = db.execute_sql(""" SELECT DAYOFWEEK(sent_at) AS d, sender, COUNT(*) FROM message WHERE conversation_id='{}' GROUP BY d, sender; """.format(conversation_id)).fetchall() # Load in dict messages_per_day_per_user = {} for message in messages_per_day: if message[1] not in messages_per_day_per_user: messages_per_day_per_user[message[1]] = { "color": get_conf("colors")[len(messages_per_day_per_user)], "data": [] } messages_per_day_per_user[message[1]]["data"].append({ "x": get_conf("days_of_week")[int(message[0])], "y": message[2] }) # Build output output = [] for name, value in messages_per_day_per_user.items(): # Put sunday at the end of the list value["data"] = value["data"][1:] + [value["data"][0]] value.update({"title": name}) output.append(value) return messages.message(output, namespace=self.get_namespace(request))
def get_language(self, conversation_id): """ Keep only message with > 20 char and 5 words Get language of each messages Keep 4 most used languages Add others value's = sum of all others languages """ message_per_lang = defaultdict(int) messages_ = db.execute_sql(""" SELECT content FROM message WHERE conversation_id='{}' AND content IS NOT NULL AND content <> "" AND CHARACTER_LENGTH(content) > 20 """.format(conversation_id)).fetchall() log.info("{} messages found".format(len(messages_))) messages_ = [x[0] for x in messages_ if len(x[0].split(' ')) >= 5] log.info("{} messages with words".format(len(messages_))) for content in messages_: try: content = content.encode('latin1', 'ignore').decode('utf8') except: continue lang = langid.classify(content)[0] message_per_lang[lang] += 1 # Build output output = [] for lang, nb in message_per_lang.items(): output.append({ "lang": lang, "language_pretty": pycountry.languages.get(alpha_2=lang).name, "nb_messages": nb, "flag": get_conf("flags").get(lang, get_conf("flags").get("other")) }) output = sorted(output, key=lambda i: i['nb_messages'], reverse=True) # Merge "other" language others = sum([x["nb_messages"] for x in output[4:]]) # Keep top 5 output = output[:4] output.append({ "lang": "others", "language_pretty": "Others", "nb_messages": others, "flag": get_conf("flags")["other"] }) return messages.message(output, namespace=self.get_namespace(request))
def __init__(self, conf=None): if not conf: conf = get_conf() self.conf = conf['pgsql'] self.host = self.conf.get('host', '127.0.0.1') self.port = self.conf.get('port', 5432) self.user = self.conf.get('user', 'postgres') self.password = self.conf.get('password', 'boshi') self.database = self.conf.get('database', 'boshi')
def add_file_handler(): # Create a custom logger log = logging.getLogger("etl") if os.environ.get("ENV", "LOCAL") == "PROD": # Create file handler filename = "{}/etl_{}.log".format( get_conf("log_dir"), datetime.now().strftime('%Y_%m_%d_%H_%M')) f_handler = logging.FileHandler(filename) f_handler.setLevel( getattr(logging, os.environ.get("LOG_LEVEL", "INFO").upper())) f_format = logging.Formatter( '%(asctime)s - %(name)s - %(filename)s - %(funcName)s - %(levelname)s - %(message)s' ) f_handler.setFormatter(f_format) log.addHandler(f_handler) return log
# -*- coding: utf-8 -*- import waitress from web.app import api as app from log.log_init import init_log from utils.utils import get_conf def init_(): # 日志初始化 init_log() if __name__ == '__main__': conf = get_conf() # init_() sv = conf.get('server', {}) host = sv.get('host', '0.0.0.0') port = sv.get('port', 8888) # print(host, port) waitress.serve(app, host=host, port=port, _quiet=True)
def get_conversation_info(self, conversation_id): conversations = db.execute_sql(""" SELECT COUNT(*), MAX(sent_at), MIN(sent_at), sender, ANY_VALUE(is_still_participant), ANY_VALUE(title), ANY_VALUE(thread_type), SUM(LENGTH(content) - LENGTH(REPLACE(content, ' ', '')) + 1) FROM message WHERE conversation_id='{}' GROUP BY sender; """.format(conversation_id)).fetchall() # Build output output = { "nb_messages_per_user": [], "nb_messages": sum([x[0] for x in conversations]), "title": conversations[0][5], "is_group_conversation": True if conversations[0][6] == "RegularGroup" else False, "is_still_participant": bool(conversations[0][4]), "first_message": min([x[2] for x in conversations]).strftime("%b %d %Y %H:%M:%S"), "last_message": max([x[1] for x in conversations]).strftime("%b %d %Y %H:%M:%S"), "nb_words": sum([int(x[7]) for x in conversations]), } output["words_per_message"] = round( output["nb_words"] / output["nb_messages"], 2) # Calculate messages per day try: output["message_per_day"] = round( output["nb_messages"] / (parse(output["last_message"]) - parse(output["first_message"])).days, 2) except ZeroDivisionError: output["message_per_day"] = 0.0 # Add participants, nb messages/participants, sort list for i, conversation in enumerate(conversations): output["nb_messages_per_user"].append({ "user": conversation[3], "nb_message": conversation[0], "label": conversation[3], "color": get_conf("colors")[i], "rate": round(conversation[0] * 100 / output["nb_messages"], 2), "words": int(conversation[7]), "time_spent": format_duration(int(conversation[7]) * 1.4) }) output["nb_messages_per_user"] = sorted(output["nb_messages_per_user"], key=lambda i: i['nb_message'], reverse=True) return messages.message(output, namespace=self.get_namespace(request))
# -*- coding: utf-8 -*- # @Author: caixin # @Date: 2017-12-05 17:00:27 # @Last Modified by: [email protected] # @Last Modified time: 2017-12-06 10:40:34 from utils.utils import get_conf CONF = get_conf(r'config.toml') APRISO_URL = 'http://%s/Apriso/BusinessWebServices/JobExecutor.asmx?WSDL' DATAXMLS = r"""<?xml version="1.0" encoding="utf-8"?> <OperationInterpretationParameters xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema"> <OperationID>0</OperationID> <OperationCode>%s</OperationCode> <OperationResolutionMethod>ByOperationCode</OperationResolutionMethod> <Inputs> <PropertyBagItem Key="Json_str" name = "Json_str"> <Value xsi:type="xsd:string">%s</Value> </PropertyBagItem> </Inputs> <Outputs/> <InputsType/> <OutputsType/> <SystemVariables/> <ExecuteRemote>false</ExecuteRemote> <EmployeeID>-1</EmployeeID> <TestRun>false</TestRun> </OperationInterpretationParameters>"""
import peewee from backoff import expo, on_exception from utils.utils import get_conf creds = get_conf("mysql_creds") creds["host"] = creds["host"][get_conf("env")] db = peewee.MySQLDatabase(**creds) @on_exception(expo, peewee.OperationalError, max_tries=8) def create_connection(): try: db.connection() except peewee.OperationalError: db.connect(reuse_if_open=True) @on_exception(expo, peewee.OperationalError, max_tries=8) def destroy_connection(exc): if not db.is_closed(): db.close() def init(app): app.before_request(create_connection) app.teardown_request(destroy_connection)