Exemple #1
0
    def get_messages_per_day(self, conversation_id):
        messages_per_day = db.execute_sql("""
        SELECT 
            DAYOFWEEK(sent_at) AS d,
            sender,
            COUNT(*)
        FROM message
        WHERE conversation_id='{}'
        GROUP BY d, sender;
        """.format(conversation_id)).fetchall()

        # Load in dict
        messages_per_day_per_user = {}
        for message in messages_per_day:
            if message[1] not in messages_per_day_per_user:
                messages_per_day_per_user[message[1]] = {
                    "color":
                    get_conf("colors")[len(messages_per_day_per_user)],
                    "data": []
                }
            messages_per_day_per_user[message[1]]["data"].append({
                "x":
                get_conf("days_of_week")[int(message[0])],
                "y":
                message[2]
            })

        # Build output
        output = []
        for name, value in messages_per_day_per_user.items():
            # Put sunday at the end of the list
            value["data"] = value["data"][1:] + [value["data"][0]]
            value.update({"title": name})
            output.append(value)
        return messages.message(output, namespace=self.get_namespace(request))
Exemple #2
0
    def get_language(self, conversation_id):
        """
            Keep only message with > 20 char and 5 words
            Get language of each messages
            Keep 4 most used languages
            Add others value's = sum of all others languages
        """
        message_per_lang = defaultdict(int)
        messages_ = db.execute_sql("""
            SELECT content
            FROM message
            WHERE 
                conversation_id='{}' AND
                content IS NOT NULL AND
                content <> "" AND
                CHARACTER_LENGTH(content) > 20
            """.format(conversation_id)).fetchall()
        log.info("{} messages found".format(len(messages_)))
        messages_ = [x[0] for x in messages_ if len(x[0].split(' ')) >= 5]
        log.info("{} messages with words".format(len(messages_)))

        for content in messages_:
            try:
                content = content.encode('latin1', 'ignore').decode('utf8')
            except:
                continue
            lang = langid.classify(content)[0]
            message_per_lang[lang] += 1

        # Build output
        output = []
        for lang, nb in message_per_lang.items():
            output.append({
                "lang":
                lang,
                "language_pretty":
                pycountry.languages.get(alpha_2=lang).name,
                "nb_messages":
                nb,
                "flag":
                get_conf("flags").get(lang,
                                      get_conf("flags").get("other"))
            })
        output = sorted(output, key=lambda i: i['nb_messages'], reverse=True)
        # Merge "other" language
        others = sum([x["nb_messages"] for x in output[4:]])
        # Keep top 5
        output = output[:4]
        output.append({
            "lang": "others",
            "language_pretty": "Others",
            "nb_messages": others,
            "flag": get_conf("flags")["other"]
        })
        return messages.message(output, namespace=self.get_namespace(request))
Exemple #3
0
 def __init__(self, conf=None):
     if not conf:
         conf = get_conf()
     self.conf = conf['pgsql']
     self.host = self.conf.get('host', '127.0.0.1')
     self.port = self.conf.get('port', 5432)
     self.user = self.conf.get('user', 'postgres')
     self.password = self.conf.get('password', 'boshi')
     self.database = self.conf.get('database', 'boshi')
def add_file_handler():
    # Create a custom logger
    log = logging.getLogger("etl")
    if os.environ.get("ENV", "LOCAL") == "PROD":
        # Create file handler
        filename = "{}/etl_{}.log".format(
            get_conf("log_dir"),
            datetime.now().strftime('%Y_%m_%d_%H_%M'))
        f_handler = logging.FileHandler(filename)
        f_handler.setLevel(
            getattr(logging,
                    os.environ.get("LOG_LEVEL", "INFO").upper()))
        f_format = logging.Formatter(
            '%(asctime)s - %(name)s - %(filename)s - %(funcName)s - %(levelname)s - %(message)s'
        )
        f_handler.setFormatter(f_format)
        log.addHandler(f_handler)
    return log
Exemple #5
0
# -*- coding: utf-8 -*-
import waitress

from web.app import api as app
from log.log_init import init_log
from utils.utils import get_conf


def init_():
    # 日志初始化
    init_log()


if __name__ == '__main__':
    conf = get_conf()
    # init_()
    sv = conf.get('server', {})
    host = sv.get('host', '0.0.0.0')
    port = sv.get('port', 8888)
    # print(host, port)
    waitress.serve(app, host=host, port=port, _quiet=True)

    def get_conversation_info(self, conversation_id):
        conversations = db.execute_sql("""
            SELECT
                COUNT(*),
                MAX(sent_at),
                MIN(sent_at),
                sender,
                ANY_VALUE(is_still_participant),
                ANY_VALUE(title),
                ANY_VALUE(thread_type),
                SUM(LENGTH(content) - LENGTH(REPLACE(content, ' ', '')) + 1)
            FROM message
            WHERE conversation_id='{}'
            GROUP BY sender;
            """.format(conversation_id)).fetchall()

        # Build output
        output = {
            "nb_messages_per_user": [],
            "nb_messages":
            sum([x[0] for x in conversations]),
            "title":
            conversations[0][5],
            "is_group_conversation":
            True if conversations[0][6] == "RegularGroup" else False,
            "is_still_participant":
            bool(conversations[0][4]),
            "first_message":
            min([x[2] for x in conversations]).strftime("%b %d %Y %H:%M:%S"),
            "last_message":
            max([x[1] for x in conversations]).strftime("%b %d %Y %H:%M:%S"),
            "nb_words":
            sum([int(x[7]) for x in conversations]),
        }
        output["words_per_message"] = round(
            output["nb_words"] / output["nb_messages"], 2)
        # Calculate messages per day
        try:
            output["message_per_day"] = round(
                output["nb_messages"] / (parse(output["last_message"]) -
                                         parse(output["first_message"])).days,
                2)
        except ZeroDivisionError:
            output["message_per_day"] = 0.0

        # Add participants, nb messages/participants, sort list
        for i, conversation in enumerate(conversations):
            output["nb_messages_per_user"].append({
                "user":
                conversation[3],
                "nb_message":
                conversation[0],
                "label":
                conversation[3],
                "color":
                get_conf("colors")[i],
                "rate":
                round(conversation[0] * 100 / output["nb_messages"], 2),
                "words":
                int(conversation[7]),
                "time_spent":
                format_duration(int(conversation[7]) * 1.4)
            })
        output["nb_messages_per_user"] = sorted(output["nb_messages_per_user"],
                                                key=lambda i: i['nb_message'],
                                                reverse=True)
        return messages.message(output, namespace=self.get_namespace(request))
Exemple #7
0
# -*- coding: utf-8 -*-
# @Author: caixin
# @Date:   2017-12-05 17:00:27
# @Last Modified by:   [email protected]
# @Last Modified time: 2017-12-06 10:40:34
from utils.utils import get_conf

CONF = get_conf(r'config.toml')

APRISO_URL = 'http://%s/Apriso/BusinessWebServices/JobExecutor.asmx?WSDL'

DATAXMLS = r"""<?xml version="1.0" encoding="utf-8"?>
<OperationInterpretationParameters
  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  xmlns:xsd="http://www.w3.org/2001/XMLSchema">
    <OperationID>0</OperationID>
    <OperationCode>%s</OperationCode>
    <OperationResolutionMethod>ByOperationCode</OperationResolutionMethod>
    <Inputs>
            <PropertyBagItem Key="Json_str" name = "Json_str">
                <Value xsi:type="xsd:string">%s</Value>
            </PropertyBagItem>
    </Inputs>
    <Outputs/>
    <InputsType/>
    <OutputsType/>
    <SystemVariables/>
    <ExecuteRemote>false</ExecuteRemote>
    <EmployeeID>-1</EmployeeID>
    <TestRun>false</TestRun>
</OperationInterpretationParameters>"""
Exemple #8
0
import peewee
from backoff import expo, on_exception

from utils.utils import get_conf

creds = get_conf("mysql_creds")
creds["host"] = creds["host"][get_conf("env")]
db = peewee.MySQLDatabase(**creds)


@on_exception(expo, peewee.OperationalError, max_tries=8)
def create_connection():
    try:
        db.connection()
    except peewee.OperationalError:
        db.connect(reuse_if_open=True)


@on_exception(expo, peewee.OperationalError, max_tries=8)
def destroy_connection(exc):
    if not db.is_closed():
        db.close()


def init(app):
    app.before_request(create_connection)
    app.teardown_request(destroy_connection)