예제 #1
0
 def entityData(self, td):
     data = tools.getJson(td.data)
     data_text = []
     for key, val in data.items():
         data_text.append("%s:%s" % (key, val))
     row = [tools.iso_date(td.date), ', '.join(data_text)]
     return row
예제 #2
0
    def testValidJson(self):
        volley = [{
            'json': "{}",
            'to_return': {}
        }, {
            'json': '{"v":"1"}',
            'to_return': {
                "v": "1"
            }
        }, {
            'json': '{"v":"1"\r\n}',
            'to_return': {
                "v": "1"
            }
        }, {
            'json': '{"v":1}',
            'to_return': {
                "v": 1
            }
        }, {
            'json': '"{}"',
            'to_return': {}
        }, {
            'json': "invalid",
            'to_return': None
        }, {
            'json': '[{"1":"one"}]',
            'to_return': [{
                1: "one"
            }]
        }]

        for v in volley:
            returned = tools.getJson(v['json'])
            self.assertEqual(json.dumps(returned), json.dumps(v['to_return']))
예제 #3
0
    def submit(self, d):
        '''
        Submit today's journal (yesterday if 00:00 - 04:00)
        '''
        date = None
        _date = self.request.get('date')
        if _date:
            date = tools.fromISODate(_date)
        task_json = tools.getJson(self.request.get('tasks'))  # JSON
        params = tools.gets(self,
                            strings=['lat', 'lon', 'tags_from_text'],
                            json=['data'],
                            lists=['tags'])
        logging.debug(params)
        if params.get('data'):
            if not params.get('tags'):
                params['tags'] = []
            jrnl = MiniJournal.Create(self.user, date)
            jrnl.Update(**params)
            jrnl.parse_tags()
            jrnl.put()

            if task_json:
                # Save new tasks for tomorrow
                tasks = []
                for t in task_json:
                    if t:
                        task = Task.Create(self.user, t)
                        tasks.append(task)
                ndb.put_multi(tasks)
            self.success = True
            self.message = "Journal submitted!"

        self.set_response({'journal': jrnl.json() if jrnl else None})
예제 #4
0
    def apiai_request(self, d):
        '''

        '''
        auth_key = self.request.headers.get('Auth-Key')
        res = {'source': 'Flow'}
        speech = None
        end_convo = False
        data = {}
        if auth_key == secrets.API_AI_AUTH_KEY:
            body = tools.getJson(self.request.body)
            logging.debug(body)
            agent_type = self._get_agent_type(body)
            id, action, parameters = self._get_action_and_params(body)
            self._get_user(body)
            if action == 'input.disconnect':
                speech = "Alright, you've disconnected your Flow account"
                end_convo = True
                self.signout()  # Clear session
            else:
                from services.agent import ConversationAgent
                ca = ConversationAgent(type=agent_type, user=self.user)
                speech, data, end_convo = ca.respond_to_action(action, parameters=parameters)

        if not speech:
            speech = "Uh oh, something weird happened"
        res['speech'] = speech
        res['displayText'] = speech
        data['google'] = {
            'expect_user_response': not end_convo
        }
        res['data'] = data
        res['contextOut'] = []
        self.json_out(res, debug=True)
예제 #5
0
    def __init__(self,
                 path,
                 pct_size=0.03,
                 max_grps=5,
                 chiq_pv=0.05,
                 ifmono=True,
                 keepnan=True,
                 methods='tree'):
        self.params = {}
        self.params['pct_size'] = pct_size
        self.params['max_grps'] = max_grps
        self.params['chiq_pv'] = chiq_pv
        self.params['ifmono'] = ifmono
        self.params['keepnan'] = keepnan
        self.params['methods'] = methods

        if isinstance(path, dict):
            smy = path
        else:
            smy = tools.getJson(path)
        self.undo_list = smy['undo']
        self.fill_list = smy['fill']
        self.cap_list = smy['cap']  #
        self.var2char_list = smy['var2char']  #should be a dictionary
        self.onehot_list = smy['onehot']
        self.woe_list = smy['woeCal']
예제 #6
0
 def json(self):
     data = {
         'id': self.key.id(),
         'level':self.level,
         'level_name':self.print_level(),
         'name': self.name,
         'email':self.email,
         'phone': self.phone,
         'location_text': self.location_text,
         'ts_created': tools.unixtime(self.dt_created),
         'services_enabled': self.services_enabled,
         'service_settings': tools.getJson(self.service_settings)
     }
     credentials = tools.getJson(self.credentials)
     if credentials:
         data['scopes'] = credentials.get('scopes')
     return data
예제 #7
0
 def handle_error(self, response):
     logging.warning(response.content)
     data = tools.getJson(response.content)
     error = data.get('error', {})
     code = error.get('code')
     subcode = error.get('error_subcode')
     if code == 190 and subcode == 460:
         # Error validating access token: The session has been invalidated because the user
         # changed their password or Facebook has changed the session for security reasons.
         pass
예제 #8
0
 def AllWoeCollects(self, woe_vrs_info):
     """
     collect calculated woe infos
     """
     if type(woe_vrs_info) is dict:
         self.woeDetail = woe_vrs_info
     else:
         self.woeDetail = tools.getJson(
             self.path + '/feature_process_methods/IVstat/woeDetail_' +
             woe_vrs_info + '.json')
예제 #9
0
 def entityData(self, td):
     data = tools.getJson(td.data)
     data_text = []
     for key, val in data.items():
         data_text.append("%s:%s" % (key, val))
     row = [
         tools.iso_date(td.date),
         ', '.join(data_text)
     ]
     return row
예제 #10
0
 def generate(self, d):
     type = self.request.get_range('type', default=REPORT.SENSOR_DATA_REPORT)
     ftype = self.request.get_range('ftype', default=REPORT.CSV)
     target = self.request.get('target')
     specs_json = self.request.get('specs_json')
     specs = tools.getJson(specs_json)
     report = Report.Create(d['enterprise'], type=type, specs=specs, ftype=ftype)
     report.put()
     tools.safe_add_task(backgroundReportRun, str(report.key()), target=target, _queue="worker-queue")
     self.json_out(success=True, message="%s generating..." % report.title)
예제 #11
0
 def __init__(self, request, type=AGENT_FBOOK_MESSENGER, user=None):
     super(FacebookAgent, self).__init__(type=type, user=user)
     self.message_data = {}
     self.reply = None
     self.md = {}  # To populate with entry.messaging[0]
     self.request_type = None
     self.body = tools.getJson(request.body)
     if not user:
         self._get_fbook_user()
     self._get_request_type()
     logging.debug("Authenticated user: %s. Type: %s" % (self.user, self.request_type))
     logging.debug(self.body)
     self._process_request()
예제 #12
0
    def testValidJson(self):
        volley = [
            {'json': "{}", 'to_return': {}},
            {'json': '{"v":"1"}', 'to_return': {"v": "1"}},
            {'json': '{"v":"1"\r\n}', 'to_return': {"v": "1"}},
            {'json': '{"v":1}', 'to_return': {"v": 1}},
            {'json': '"{}"', 'to_return': {}},
            {'json': "invalid", 'to_return': None},
            {'json': '[{"1":"one"}]', 'to_return': [{1: "one"}]}
        ]

        for v in volley:
            returned = tools.getJson(v['json'])
            self.assertEqual(json.dumps(returned), json.dumps(v['to_return']))
예제 #13
0
 def update(self, d):
     '''
     Update a single TrackingDay() object with properties
     defined via JSON key(str) -> value(str)
     '''
     date = None
     _date = self.request.get('date')
     if _date:
         date = tools.fromISODate(_date)
     data_json = tools.getJson(self.request.get('data'))  # JSON
     td = TrackingDay.Create(self.user, date)  # Get or create
     if data_json:
         td.set_properties(data_json)
         td.put()
     self.success = True
     self.set_response({'tracking_day': td.json() if td else None})
예제 #14
0
 def generate(self, d):
     from constants import REPORT
     from handlers import APIError
     from tasks import backgroundReportRun
     type = self.request.get_range('type')
     if not type:
         raise APIError("No type in report request")
     ftype = self.request.get_range('ftype', default=REPORT.CSV)
     specs_json = self.request.get('specs_json')
     specs = tools.getJson(specs_json)
     report = Report.Create(self.user, type=type, specs=specs, ftype=ftype)
     report.put()
     tools.safe_add_task(backgroundReportRun, report.key.urlsafe(), _queue="report-queue")
     self.set_response(success=True, message="%s generating..." % report.title, data={
         'report': report.json() if report else None
     })
    def featureSelection_AvgScore(self, top=None, ftr_c=0.65):
        """
        根据不断随机的抽取特征后的各模型表现进行特征评估
        """
        model_p = pd.read_table(self.path + '/feat_imps/all_auc.json',
                                sep=' ',
                                names=['files', 'train_auc', 'test_auc'])
        model_p = model_p[model_p['train_auc'] >= ftr_c]
        vald_records = list(model_p['files'])

        for f in range(len(vald_records)):
            tgt = tools.getJson(self.path + '/feat_imps/' + vald_records[f] +
                                '.json')
            rcd = pd.DataFrame(tgt)
            rcd.columns = [vald_records[f]]
            rcd.sort_values(vald_records[f], ascending=False, inplace=True)

            if top is not None:
                rcd = rcd.iloc[:top]
            if f == 0:
                rlt = rcd
            else:
                rlt = pd.merge(left=rlt,
                               right=rcd,
                               left_index=True,
                               right_index=True,
                               how='outer')

        if len(vald_records) == 0:
            raise ValueError('Not Enough Model')
        #return rlt
        fnl = pd.DataFrame(rlt.mean(axis=1, skipna=True), columns=['avg'])

        fnl['cnt'] = rlt.count(axis=1, numeric_only=True)
        fnl['score'] = fnl['avg'] * fnl['cnt'].apply(lambda x: np.log(2 + x))

        return fnl
예제 #16
0
 def _journal(self, message=""):
     DONE_MESSAGES = ["done", "that's all", "exit", "finished", "no"]
     MODES = ['questions', 'tasks', 'end']
     settings = tools.getJson(self.user.settings, {})
     questions = settings.get('journals', {}).get('questions', [])
     end_convo = False
     if questions:
         jrnl = MiniJournal.Get(self.user)
         if jrnl:
             return (JOURNAL.ALREADY_SUBMITTED_REPLY, True)
         else:
             if not self.cs:
                 self.cs = self._create_conversation_state()
                 self.cs.set_state('mode', 'questions')
             mode = self.cs.state.get('mode')
             mode_finished = False
             save_response = True
             last_question = None
             # Receive user message
             if mode == 'tasks':
                 is_done = message.lower().strip() in DONE_MESSAGES
                 mode_finished = is_done
                 save_response = not is_done
             elif mode == 'questions':
                 last_q_index = self.cs.state.get('last_q_index', -1)
                 last_question = last_q_index == len(questions) - 1
                 mode_finished = last_question
                 save_response = True
             if save_response:
                 successful_add = self.cs.add_message_from_user(message)
                 if not successful_add:
                     reply = self.cs.invalid_reply(
                     ) if mode == 'questions' else JOURNAL.INVALID_TASK
                     return (reply, False)
             mode_index = MODES.index(mode)
             if mode_finished:
                 mode = MODES[mode_index + 1]
                 self.cs.set_state('mode', mode)
             reply = None
             # Generate next reply
             if mode == 'questions':
                 next_q_index = last_q_index + 1
                 q = questions[next_q_index]
                 reply = q.get('text')
                 name = q.get('name')
                 response_type = q.get('response_type')
                 pattern = JOURNAL.PATTERNS.get(response_type)
                 store_number = response_type in JOURNAL.NUMERIC_RESPONSES
                 self.cs.expect_reply(
                     pattern, name,
                     store_number=store_number)  # Store as name
                 self.cs.set_state('last_q_index', next_q_index)
             elif mode == 'tasks':
                 # Ask to add tasks
                 tasks = self.cs.response_data.get('tasks', [])
                 additional = len(tasks) > 0
                 reply = JOURNAL.TOP_TASK_PROMPT_ADDTL if additional else JOURNAL.TOP_TASK_PROMPT
                 self.cs.expect_reply(JOURNAL.PTN_TEXT_RESPONSE,
                                      'tasks',
                                      store_array=True)  # Store as name
             elif mode == 'end':
                 # Finish and submit
                 task_names = []
                 if 'tasks' in self.cs.response_data:
                     task_names = self.cs.response_data.pop('tasks')
                 jrnl = MiniJournal.Create(self.user)
                 jrnl.Update(data=self.cs.response_data)
                 jrnl.parse_tags()
                 jrnl.put()
                 tasks = []
                 if task_names:
                     for tn in task_names:
                         task = Task.Create(self.user, tn)
                         tasks.append(task)
                 ndb.put_multi(tasks)
                 reply = "Report submitted!"
                 end_convo = True
             if reply:
                 self.cs.set_message_to_user(reply)
             if end_convo:
                 self._expire_conversation()
             else:
                 self._set_conversation_state()
             return (reply, end_convo)
     else:
         return ("Please visit flowdash.co to set up journal questions",
                 True)
Created on Mon Jan 13 21:11:19 2020

@author: zhuchang
"""

import pandas as pd
import numpy as np
import os
import json
from tqdm import tqdm

import tools
from WoeMethods import AllWoeFuncs, WoeFuncs
import FeatureStatTools
from FeatureProcess import putNaMethods

path = 'gt_big'
raw_data_file_name = 'raw_data.csv'

na_smy = tools.getJson(path + '/' + 'na_value_info.json')
#na_smy = {
#          '':''}

raw = pd.read_csv(path + '/' + raw_data_file_name, header=0)
for i in na_smy.keys():
    spurs = putNaMethods(na_list=[na_smy[i]])
    tmp = spurs.fit(raw[[i]]).transform(raw[[i]])
    raw = raw.assign(**{i: tmp[i]})

raw.to_csv(path + '/modify_data.csv', index=False)
예제 #18
0
 def get_svc_settings(self, svc_key):
     svc_settings = tools.getJson(self.service_settings)
     return svc_settings.get(svc_key, {})
예제 #19
0
                                                size_c=1000)

if smy_creation:
    raw_data = pd.read_csv(path + '/' + raw_data_file_name, header=0)
    to_drop_list = ['num']
    smy = {
        'label': 'label',
        'dayno': 'back_time',
        'int_col': [],
        'float_col': [],
        'str_col': [],
        'toDrop': []
    }
    total_data = len(raw_data)
    try:
        js = tools.getJson(path + '/' + 'type_info.json')
    except:
        js = tools.getJson(path + '/' + 'type_info_sample.json')
    print('generating summary')
    for k, v in tqdm(js.items()):
        if k == smy['label'] or k == smy['dayno']:
            continue
        elif k in to_drop_list:
            smy['toDrop'].append({k: 'no feature'})
        elif js[k]['dist'] <= 1:
            smy['toDrop'].append({k: 'unique_value'})
        elif js[k]['type'] == 'str' and js[k]['dist'] > 30:
            smy['toDrop'].append({k: 'too much chars'})
        elif raw_data[k].isnull().sum() / total_data > 0.98:
            smy['toDrop'].append({k: 'too much missing'})
        elif js[k]['type'] == 'str':
예제 #20
0
                                                size_c=1000)

if smy_creation:
    raw_data = pd.read_csv(path + '/' + raw_data_file_name, header=0)
    to_drop_list = ['num']
    smy = {
        'label': 'label',
        'dayno': 'back_time',
        'int_col': [],
        'float_col': [],
        'str_col': [],
        'toDrop': []
    }
    total_data = len(raw_data)
    try:
        js = tools.getJson(path + '/' + 'type_info.json')
    except:
        js = tools.getJson(path + '/' + 'type_info_sample.json')
    print('generating summary')
    for k, v in tqdm(js.items()):
        if k == smy['label'] or k == smy['dayno']:
            continue
        elif k in to_drop_list:
            smy['toDrop'].append({k: 'no feature'})
        elif js[k]['dist'] <= 1:
            smy['toDrop'].append({k: 'unique_value'})
        elif js[k]['type'] == 'str' and js[k]['dist'] > 30:
            smy['toDrop'].append({k: 'too much chars'})
        elif raw_data[k].isnull().sum() / total_data > 0.98:
            smy['toDrop'].append({k: 'too much missing'})
        elif js[k]['type'] == 'str':
예제 #21
0
 def _maybe_get_journal_questions(self):
     if self.journal_questions is None:
         qs = tools.getJson(self.user.settings,
                            {}).get('journals', {}).get('questions', [])
         self.journal_questions = qs
예제 #22
0
 def _maybe_get_journal_questions(self):
     if self.journal_questions is None:
         qs = tools.getJson(self.user.settings, {}).get('journals', {}).get('questions', [])
         self.journal_questions = qs
예제 #23
0
#设置最基本的路径变量
#通过训练样本的WOE进行特征选择并最终建逻辑回归~
if_gnrt_smy = True
ifselect = True
ifandriod = True
keep_emb = True
path = 'gt_big'
version = 'level1_trainWoeCode'
raw_data_file_name = 'raw_data.csv'
#path = '../function_test/raw_data'
rnd_seed = 21
preSelect = True
fittingpart = True
oottest = True

type_check = tools.getJson(path + '/type_info.json')
#raw_data = pd.read_csv(path+'/'+raw_data_file_name, sep = ',', header = 0, dtype = {i:type_check[i]['type'] for i in type_check.keys() if type_check[i]['type'] == 'str'})
raw_data = pd.read_csv(path + '/' + raw_data_file_name, sep=',', header=0)
"""
判断是否是andriod数据
"""
if ifandriod:
    raw_data = raw_data[raw_data['ft_dev_phone_brand'] != 'Apple']
    version = version + '_Andr'
else:
    version = version + '_Appl'
"""
读取特征相关的统计
"""
#summary.json is required, indicating modelers primary knowledge abouut features
print("Reading related information...")
예제 #24
0
from FeatureProcess import AllFtrProcess
from FeatureSelection import ModelBasedMethods

#设置最基本的路径变量
level1 = False
level2 = False
level3 = False
level4 = True
level5 = False
path = '.'
woe_vrs = 'vrs1'
raw_data_file_name = 'data.csv'
path = '../function_test/raw_data'
rnd_seed = 21

type_check = tools.getJson(path + '/type_info.json')
raw_data = pd.read_csv(path + '/' + raw_data_file_name,
                       sep=r'[\t,|]',
                       header=0,
                       dtype={
                           i: type_check[i]['type']
                           for i in type_check.keys()
                           if type_check[i]['type'] == 'str'
                       })
"""
提取之前计算的IV值,为后续评估做准备
"""
ivs = tools.getJson(path + '/feature_stat/ivStat.json')
ivs = pd.DataFrame(ivs).T
ivs['avg'] = ivs.mean(axis=1)
"""