コード例 #1
0
ファイル: my_apis.py プロジェクト: floram01/datajournalism
def query_apis():
    from data_params import APIs

    for params in APIs:
        logger.info('requesting api: ' + params['api'])
        if params['api'] == 'wb':
            api_wb(params)
コード例 #2
0
ファイル: mt_app_handler.py プロジェクト: happy5035/seiral
def mt_app_handler(msg):
    logger.debug('app handler')
    if len(msg.data):
        if msg.data[0] == TEMP_HUM_DATA:
            logger.debug('temp hum data')
            temp_hum_handler(msg)
            return
        if msg.data[0] == COOR_START:
            logger.warning('coor start')
            coor_start_handler()
            return
        if msg.data[0] == END_REPORT_STATUS_CMD:
            logger.warning('end report status')
            end_report_status_handler(msg)
            return
        if msg.data[0] == ROUTER_STATUS_CMD:
            logger.warning('router report status')
            router_report_status_handler(msg)
            return
        if msg.data[0] == MASTER_GET_ADDR_COUNT_CMD:
            logger.warning("addr info")
            addr_info_handler(msg)
            return
        if msg.data[0] == COOR_REPORT_NV_PARAMS_CMD:
            logger.warning("coor report nv params")
            coor_report_nv_params_handler(msg)
            return
        if msg.data[0] == SUCCESS:
            logger.info('cmd success')
            return
        pass
        logger.warning('unknown cmd id ')
    else:
        logger.warning('app msg data empty')
コード例 #3
0
def LoadConfig(json_file):
    global g_config
    logger.info("config file is " + json_file)
    with open(json_file, 'r') as b_oj:
        settings = json.load(b_oj)
    g_config = settings
    return g_config
コード例 #4
0
def prepare_data_params(PROJECT_NAME):
    try:
        shutil.copytree('data/.boiler_plate', 'data/' + PROJECT_NAME)
    except OSError:
        logger.info('There already is a project named ' + PROJECT_NAME + ', please choose another name')
        sys.exit()
    path='data/' + PROJECT_NAME + '/data_params.py'
    rename_project(path, PROJECT_NAME)
コード例 #5
0
def register_func(data_func):
    logger.info('register func name:%s , code :%s' %
                (data_func['name'], data_func['code']))
    handler_funcs[data_func['code']] = {
        'func': data_func['func'],
        'name': data_func['name']
    }
    pick.dump(handler_funcs, open(handler_funcs_filename, 'wb'))
    pass
コード例 #6
0
def pars_img():
    """Берет картинку с сайта"""
    try:
        image = soup.find(
            class_="page_raspis_block_img").find("img").get("src")
        urlretrieve(image, filename="schedule/schedule.png")
        logger.info("Начинаю скачивание файла")
    except NotSchedule:
        logger.warning("NOT schedule")
コード例 #7
0
def prepare_template(PROJECT_NAME):
    try:
        shutil.copyfile('app/templates/.boiler_plate.html', 'app/templates/' + PROJECT_NAME + '.html')
    except OSError:
        logger.info('There already is a project named ' + PROJECT_NAME + ', please choose another name')
        sys.exit()

    template_path = 'app/templates/' + PROJECT_NAME + '.html'
    rename_project(template_path, PROJECT_NAME)
コード例 #8
0
ファイル: DeepFM.py プロジェクト: JNUpython/tensorflow-DeepFM
    def __init__(self,
                 feature_size,
                 field_size,
                 embedding_size=8,
                 dropout_fm=[1.0, 1.0],
                 deep_layers=[32, 32],
                 dropout_deep=[0.5, 0.5, 0.5],
                 deep_layers_activation=tf.nn.relu,
                 epoch=10,
                 batch_size=256,
                 learning_rate=0.001,
                 optimizer_type="adam",
                 batch_norm=0,
                 batch_norm_decay=0.995,
                 verbose=False,
                 random_seed=2016,
                 use_fm=True,
                 use_deep=True,
                 loss_type="logloss",
                 eval_metric=roc_auc_score,
                 l2_reg=0.0,
                 greater_is_better=True):

        assert (use_fm or use_deep)
        assert loss_type in ["logloss", "mse"], \
            "loss_type can be either 'logloss' for classification task or 'mse' for regression task"

        self.feature_size = feature_size  # denote as M=259, size of the feature dictionary
        self.field_size = field_size  # denote as F=39, size of the feature fields
        logger.info("{} / {}".format(feature_size, field_size))
        self.embedding_size = embedding_size  # denote as K, size of the feature embedding

        self.dropout_fm = dropout_fm
        self.deep_layers = deep_layers
        self.dropout_deep = dropout_deep
        self.deep_layers_activation = deep_layers_activation
        self.use_fm = use_fm
        self.use_deep = use_deep
        self.l2_reg = l2_reg

        self.epoch = epoch
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.optimizer_type = optimizer_type

        self.batch_norm = batch_norm
        self.batch_norm_decay = batch_norm_decay

        self.verbose = verbose
        self.random_seed = random_seed
        self.loss_type = loss_type
        self.eval_metric = eval_metric
        self.greater_is_better = greater_is_better
        self.train_result, self.valid_result = [], []

        self._init_graph()
コード例 #9
0
ファイル: news_check.py プロジェクト: ChantWei/room_price
def LoadCheckPoint():
    global g_file_name
    global g_cur_point
    fd = open(g_file_name, "a+")
    fd.seek(0, 0)
    ct = fd.readline()
    if ct != "":
        g_cur_point = ct.split(',')[1].strip("\n")
    logger.info("get record check point from " + g_file_name + ", url: " + ct)
    fd.close()
    return
コード例 #10
0
ファイル: building_price.py プロジェクト: ChantWei/room_price
def GetPageCnt(base_url):
    logger.info("get page count, url: " + base_url)
    html = request_common.GetHtml(base_url)
    soup = BeautifulSoup(html)
    datanow = soup.find(name="div", attrs={"class": "spagenext"})
    # print(datanow.span.text)
    a = datanow.span.text
    # a = "页数 1/14	总数:189套(含限制房产)"
    logger.info(a)
    cnt = a.split("/")[1].split("	")[0]
    return cnt
コード例 #11
0
def prepare_static(PROJECT_NAME):
    try:
        shutil.copytree('app/static/viz/.boiler_plate', 'app/static/viz/' + PROJECT_NAME)
    except OSError as e:
        if e.strerror=='File exists':
            logger.info('There already is a project named ' + PROJECT_NAME + ', please choose another name')
            sys.exit()
        else: raise
    
    path='app/static/viz/' + PROJECT_NAME + '/parameters.js'
    rename_project(path, PROJECT_NAME)
コード例 #12
0
ファイル: trade_interface.py プロジェクト: dugbang/kiwoom_if
    def __handler_login(self, err_code):
        logger.info(f"login result; {err_code}")
        if self.__login_dialog is True:
            self.dynamicCall("KOA_Functions(QString, QString)",
                             "ShowAccountWindow", "")
        self.__receive_event = True

        # logger.debug(f"계좌수: {self.GetLoginInfo('ACCOUNT_CNT')}")
        self.__accounts = self.GetLoginInfo('ACCNO').split(';')
        del self.__accounts[-1]
        logger.debug(f"전체 계좌 리스트: {self.__accounts}")
コード例 #13
0
def add_data_db(user_id, first_name, last_name, user_name):
    """
    Добавляет пользоваетеля в базу данных и проверяет:
        если он есть в БД, то не добавлять
    """
    list_arg = [(user_id, first_name, last_name, user_name)]
    cursor.execute("SELECT * FROM users WHERE user_id = ?", (user_id, ))
    check1 = cursor.fetchall()
    if not check1:
        cursor.executemany('INSERT INTO users VALUES(?,?,?,?)', list_arg)
    else:
        logger.info("Пользователь уже зарегистрован в базе данных")
    conn.commit()
コード例 #14
0
def my_cleaning(PROJECT_NAME):
    sys.path.insert(0,'../' + PROJECT_NAME + '/')
    from data_params import DATABASE
    
    sys.path.insert(0,'../' + PROJECT_NAME + '/2.cleaning/')
    from my_cleaning_methods import *

    for collection in TO_CLEAN:
        df = mongo_to_dataframe(DATABASE, collection['collection_name'])
        df = collection['method'](df)
        logger.info('cleaning ' + collection['collection_name'] + ', with shape: ' + str(df.shape))
        dataframe_to_mongo(df, DATABASE, collection['collection_name'], erase=True)
        logger.info('cleaning of ' + collection['collection_name'] + ' successful, output shape: ' + str(df.shape))
コード例 #15
0
ファイル: dbhelper.py プロジェクト: happy5035/seiral
def update_net_param(net_param: NetParams):
    sess = DBSession()
    try:
        query = sess.query(NetParams)
        rst = query.filter_by(net_param_id=1).first()
        rst.remote_uart_addr = 1234
        logger.info(rst)
        sess.commit()
    except Exception as e:
        logger.error(e)
        sess.rollback()
        logger.warning('update router_device failed %s' % net_param)
    sess.close()
    pass
コード例 #16
0
ファイル: building_price.py プロジェクト: ChantWei/room_price
def GetAndSavePriceInfo(bdinfo, files2mail):
    buildings = {}
    title = bdinfo.name
    build_info_url = request_common.GetHostUrl() + bdinfo.short_url
    presell_id = GetFirstPreSellId(build_info_url)
    url_param = "?isopen=&presellid=" + presell_id + "&buildingid=&area=&allprice=&housestate=&housetype=&page="
    base_url = request_common.GetHostUrl() + bdinfo.short_url + url_param
    strcnt = GetPageCnt(base_url)
    if not unicode(strcnt).isnumeric():
        logger.info("get total page count failed")
        return False

    cnt = int(strcnt)
    logger.info("total page count: " + str(cnt))
    all_page = []
    for num in range(1, cnt + 1):
        item_page = base_url + str(num)
        all_page.append(item_page)

    i = 1
    for url in all_page:
        time.sleep(1)
        logger.info("start parse, page: " + str(i))
        html = request_common.GetHtml(url)
        ParseBuildingDetail(buildings, html)
        logger.info("parse end, page: " + str(i))
        i = i + 1

    if (format_out.IsCanGoodFormat(buildings)):
        format_out.GoodOut(buildings, title, files2mail)
    else:
        format_out.SimpleOut(buildings, title, files2mail)

    return True
コード例 #17
0
ファイル: mt_sys_handler.py プロジェクト: happy5035/seiral
def mt_sys_handler(msg: Msg):
    logger.debug('sys handler')
    if len(msg.data):
        if msg.data[0] == SUCCESS:
            logger.info('success')
        handler_info = get_handler_func(msg.cmd_state1, msg.cmd_state2)
        if handler_info:
            handler_name = handler_info['name']
            handler_func = handler_info['func']
            logger.info('handler func %s' % handler_name)
            handler_func(msg)
            pass
        else:
            logger.warning('handler func not found')
    pass
コード例 #18
0
ファイル: mt_app_handler.py プロジェクト: happy5035/seiral
def addr_info_handler(msg):
    data = msg.data
    idx = 1
    count = data[idx:idx + 1]
    logger.info('addr count %d' % bytes_to_int(count))
    idx += 1
    addrs = []
    for i in range(bytes_to_int(count)):
        addr = data[idx:idx + 2]
        idx += 2
        _addr = bytes_to_int(addr)
        addrs.append('0X%04X' % _addr)
        pass
    logger.warning('addr info %s' % addrs)
    pass
コード例 #19
0
def ParseBuildingInfo(tr, infos):
    td = tr.td
    name = td.a.text.strip()
    url = ''
    for sib in td.next_siblings:
        if (type(sib) == type(td)):
            url = sib.a.attrs['href']
            break

    if not IsNewBuildingsAlias(url):
        logger.info("Had Save This Building Info: " + name)
        return False

    infos.append(PreSellInfo(name, url))
    logger.info("Need To Save This Building Info: " + name)
    return True
コード例 #20
0
ファイル: main.py プロジェクト: happy5035/seiral
def fcs_state(ser):
    global state
    ch = ser.read()
    if len(ch) == 0:
        return
    ch = int().from_bytes(ch, 'big')
    fcs_token = cal_fcs()
    if fcs_token == ch:
        mt_msg = Msg(msg)
        msg_queue.put(mt_msg)
        logger.debug(mt_msg)
        logger.debug('fcs success')
    else:
        logger.info('fcs failed')
    state = SOP_STATE
    init_msg()
    timer.cancel()
コード例 #21
0
ファイル: my_apis.py プロジェクト: floram01/datajournalism
def api_wb(params):
    from data_params import DATABASE
    country_codes = mongo_to_dataframe('utilities', 'country_code')

    df = wb.download(
                     indicator=params['indicator'],
                     country=params['country'],
                     start=params['start'],
                     end=params['end']
                     )
    df = df.reset_index()
    df.rename(columns=params['col_rename'], inplace=True)
    df = pd.merge(df,country_codes, left_on='country',right_on='country_name')
    df = df[['GDP_cst_dollars','ISO3','population','year']]

    logger.info('inserting df with shape: ' + str(df.shape))
    dataframe_to_mongo(df, DATABASE,params['collection_name'], erase=True)
    logger.info('insertion sucessful in db' + DATABASE + ' of collection: ' + params['collection_name'])
コード例 #22
0
ファイル: main.py プロジェクト: happy5035/seiral
 def handle(self):
     data = self.request.recv(1024)
     data = str(data, 'utf-8')
     data = json.loads(data)
     items = []
     pv = data['pv']
     data = data['param']
     for d in data:
         param = find_params_by_name(d['name'])
         item = NvItem(int(param['item_id']), int(param['item_len']),
                       int(d['value']))
         items.append(item)
         pass
     logger.info(items)
     msg_data = mt_sys.app_msg_req(pv, items)
     serial_out_msg_queue.put({'data': msg_data})
     msg_data = mt_sys.app_msg_net_param_update()
     serial_out_msg_queue.put({'data': msg_data})
コード例 #23
0
def update_archive(PROJECT_NAME):
    df = pd.read_csv('app/archive.csv')
    if PROJECT_NAME in df.project_name.unique():
            logger.info('There already is a project named ' + PROJECT_NAME + ', please choose another name')
            sys.exit()
    last_id = df.index[-1] 
    previous_url = df.loc[:, 'project_name'][last_id]
    
    df = df.append({
              'project_name':PROJECT_NAME,
              'template_name':PROJECT_NAME + '.html',
              'previous_url':previous_url,
              },
              ignore_index = True
              )

    df.loc[df.index == last_id,'next_url'] = PROJECT_NAME
    df.to_csv('app/archive.csv', index=False)
コード例 #24
0
        def wrapper(**kwargs):
            tries = kwargs['tries']
            delay = kwargs['delay']
            db_name = kwargs['db_name']

            for i in range(int(tries)):
                logger.info(f"It is my {i} try to connect")
                conn = None
                if os.path.isfile(db_name):
                    try:
                        conn = sqlite3.connect(db_name)
                    except Exception as e:
                        logger.error(f"The {i} attempt failed", exc_info=True)
                        time.sleep(delay)
                    else:
                        logger.info("Connection established")
                    return conn
                else:
                    logger.error(f"No database {db_name}")
コード例 #25
0
 def _create_examples(self, lines, set_type):
     """Creates examples for the training and dev sets."""
     examples = []
     print("length of lines:", len(lines))
     for (i, line) in enumerate(lines):
         # print('#i:',i,line)
         if i == 0:
             # TODO 为什么第一行不要???
             logger.info(line)
             continue
         guid = "%s-%s" % (set_type, i)
         try:
             label = tokenization.convert_to_unicode(line[0])
             text_a = tokenization.convert_to_unicode(line[1])
             text_b = tokenization.convert_to_unicode(line[2])
             examples.append(
                 InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
         except Exception:
             print('###error.i:', i, line)
     return examples
コード例 #26
0
ファイル: main.py プロジェクト: happy5035/seiral
 def run(self):
     while True:
         try:
             send_data = serial_out_msg_queue.get()
             data = send_data['data']
             self.client.send_data(data)
             try:
                 rep = serial_rep_msg_queue.get(timeout=2)
                 if rep == SUCCESS:
                     logger.info('rep success,data = %s' % data)
                     pass
                 if rep == FAILED:
                     logger.warning('rep failed,data =  %s' % data)
                     pass
             except Empty as e:
                 logger.warning('rep timeout,data =  %s' % data)
                 pass
         except Exception as e:
             time.sleep(5)
             logger.error(traceback.format_exc())
     pass
コード例 #27
0
def GetHtml(url):
    config = my_config.GetConfig()
    cookie = config['remote_server']['cookie']
    html = ""
    request = urllib2.Request(
        url,
        headers={
            'User-Agent':
            'Mozilla/5.0 (Windows NT 10.0 Win64 x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.80 Safari/537.36'
        })
    request.add_header("Cookie", cookie)
    while (True):
        try:
            response = urllib2.urlopen(request, timeout=60)
            html = response.read()
            break
        except urllib2.HTTPError, e:
            traceback.print_stack()
            logger.info(e.code)
            logger.error(e.reason)
            logger.error(traceback)
コード例 #28
0
def insert_scrapped_in_mongo():
    from data_params import PROJECT, SCRAPPED, DATABASE
    #insert path to scrapped data
    common_path = "../" + PROJECT + "/1.getting the data/1.scrapping/" + PROJECT + '/'
    #insert each scrapped file in mongodb
    for file in SCRAPPED:
        logger.info('inserting scraped data')
        try:
            df = pd.read_json(open(common_path + file['file']), orient=file['orient'])
            logger.info('inserting df with shape: ' + str(df.shape))
            dataframe_to_mongo(df, DATABASE,file['collection_name'], erase=True)
            logger.info('insertion sucessful in db' + DATABASE + ' of collection: ' + file['collection_name'])
        except:
            logger.info('No file ' + file['file'] + 'of type' + file['type'] + 'in project scrapped folder')
コード例 #29
0
ファイル: email_op.py プロジェクト: ChantWei/room_price
def SendEMail(files2mail):
    config = my_config.GetConfig()
    # 第三方 SMTP 服务
    mail_host = "smtp.qq.com"  # 设置服务器
    sender = config['email']['sender']
    mail_user = config['email']['mail_user']  # 用户名
    mail_pass = config['email']['mail_pass']  # 口令

    receivers = []
    for recv in config['email']['receivers']:
        receivers.append(recv['qq'])

    msg = MIMEMultipart()
    msg['From'] = formataddr([u'Chant', sender])
    msg['To'] = formataddr([u'Nancy', receivers])
    msg['Subject'] = Header(config['email']['topic'], 'utf-8').encode()
    msg.attach(MIMEText(config['email']['text'], 'plain', 'utf-8'))

    for file in files2mail:
        # 添加附件
        att1 = MIMEText(open(file, 'rb').read(), 'base64', 'utf-8')
        att1["Content-Type"] = 'application/octet-stream'
        att1.add_header('Content-Disposition',
                        'attachment',
                        filename=Header(file, 'utf-8').encode())  # 防止中文附件名称乱码
        msg.attach(att1)

    try:
        smtpObj = smtplib.SMTP()
        smtpObj.connect(mail_host, 25)  # 25 为 SMTP 端口号
        smtpObj.login(mail_user, mail_pass)
        smtpObj.set_debuglevel(1)
        smtpObj.sendmail(sender, receivers, msg.as_string())
        smtpObj.quit()
        logger.info("Send EMail OK")
    except smtplib.SMTPException as e:
        logger.error("Send EMail Error: " + e)
コード例 #30
0
ファイル: main.py プロジェクト: JNUpython/tensorflow-DeepFM
def _run_base_model_dfm(dfTrain, dfTest, folds, dfm_params):
    # 将训练数据以类的形式
    fd = FeatureDictionary(
        dfTrain=dfTrain, dfTest=dfTest, numeric_cols=config.NUMERIC_COLS, ignore_cols=config.IGNORE_COLS)
    logger.info(fd.feat_dim)
    logger.info(fd.feat_dict)

    # 数据预处理
    data_parser = DataParser(feat_dict=fd)
    Xi_train, Xv_train, y_train = data_parser.parse(df=dfTrain, has_label=True)
    Xi_test, Xv_test, ids_test = data_parser.parse(df=dfTest)
    dfm_params["feature_size"] = fd.feat_dim
    dfm_params["field_size"] = len(Xi_train[0])
    logger.info("特征的数量/2值化处理后特征数量:{}/{}".format(dfm_params["field_size"], dfm_params["feature_size"]))

    y_train_meta = np.zeros((dfTrain.shape[0], 1), dtype=float)
    y_test_meta = np.zeros((dfTest.shape[0], 1), dtype=float)
    _get = lambda x, l: [x[i] for i in l]
    gini_results_cv = np.zeros(len(folds), dtype=float)
    gini_results_epoch_train = np.zeros((len(folds), dfm_params["epoch"]), dtype=float)
    gini_results_epoch_valid = np.zeros((len(folds), dfm_params["epoch"]), dtype=float)
    for i, (train_idx, valid_idx) in enumerate(folds):
        # k fold
        Xi_train_, Xv_train_, y_train_ = _get(Xi_train, train_idx), _get(Xv_train, train_idx), _get(y_train, train_idx)
        Xi_valid_, Xv_valid_, y_valid_ = _get(Xi_train, valid_idx), _get(Xv_train, valid_idx), _get(y_train, valid_idx)
        pprint(dfm_params)
        dfm = DeepFM(**dfm_params)
        dfm.fit(Xi_train_, Xv_train_, y_train_, Xi_valid_, Xv_valid_, y_valid_)

        y_train_meta[valid_idx, 0] = dfm.predict(Xi_valid_, Xv_valid_)
        y_test_meta[:, 0] += dfm.predict(Xi_test, Xv_test)

        gini_results_cv[i] = gini_norm(y_valid_, y_train_meta[valid_idx])
        gini_results_epoch_train[i] = dfm.train_result
        gini_results_epoch_valid[i] = dfm.valid_result

    y_test_meta /= float(len(folds))

    # save result
    if dfm_params["use_fm"] and dfm_params["use_deep"]:
        clf_str = "DeepFM"
    elif dfm_params["use_fm"]:
        clf_str = "FM"
    elif dfm_params["use_deep"]:
        clf_str = "DNN"
    print("%s: %.5f (%.5f)" % (clf_str, gini_results_cv.mean(), gini_results_cv.std()))
    filename = "%s_Mean%.5f_Std%.5f.csv" % (clf_str, gini_results_cv.mean(), gini_results_cv.std())
    _make_submission(ids_test, y_test_meta, filename)

    _plot_fig(gini_results_epoch_train, gini_results_epoch_valid, clf_str)

    return y_train_meta, y_test_meta
コード例 #31
0
def insert_file_in_mongo(file, DATABASE='utilities', collection_name='country_code'):
    logger.info('inserting' + file['name'])
    if file['type'] == 'csv':
        df = pd.read_csv(file['name'])
    elif file['type'] == 'excel':
        df = pd.read_excel(
                        file['name'],
                        sheetname=file['sheetname'],
                        skiprows=file['skiprows']
                        )
    logger.info('inserting df with shape: ' + str(df.shape))
    dataframe_to_mongo(df, DATABASE,collection_name, erase=True)
    logger.info('insertion sucessful in db ' + DATABASE + ' of collection ' + collection_name)
コード例 #32
0
ファイル: news_check.py プロジェクト: ChantWei/room_price
def UpdateCheckPoint(update_info):
    global g_cur_point
    new_point = update_info.name + "," + update_info.short_url
    if g_cur_point == update_info.short_url:
        logger.info("old record url, no need update: " + new_point)
        return
    logger.info("record check point is : " + g_cur_point +
                " , will update to : " + new_point)

    global g_file_name
    fd = open(g_file_name, "w")
    fd.seek(0, 0)
    point = new_point + "\n"
    fd.write(point)

    g_cur_point = update_info.short_url
    logger.info(g_file_name + "record check point has update to: " +
                g_cur_point + " for " + update_info.name)
    return
コード例 #33
0
ファイル: main.py プロジェクト: JNUpython/tensorflow-DeepFM
def _load_data():
    dfTrain = pd.read_csv(config.TRAIN_FILE)
    dfTest = pd.read_csv(config.TEST_FILE)

    dfTrain = preprocess(dfTrain)
    dfTest = preprocess(dfTest)

    cols = [c for c in dfTrain.columns if c not in ["id", "target"]]
    cols = [c for c in cols if (not c in config.IGNORE_COLS)]

    X_train = dfTrain[cols].values
    y_train = dfTrain["target"].values
    X_test = dfTest[cols].values
    ids_test = dfTest["id"].values
    cat_features_indices = [i for i, c in enumerate(cols) if c in config.CATEGORICAL_COLS]
    assert len(X_train[0]) == len(X_test[0])
    logger.info("test/train: {}/{}".format(len(X_test), len(X_train)))
    logger.info("features: {}".format(cols))
    logger.info(cat_features_indices)
    return dfTrain, dfTest, X_train, y_train, X_test, ids_test, cat_features_indices
コード例 #34
0
ファイル: building_price.py プロジェクト: ChantWei/room_price
def GetFirstPreSellId(build_info_url):
    logger.info("get first presell id, url: " + build_info_url)
    html = request_common.GetHtml(build_info_url)
    soup = BeautifulSoup(html)
    datanow = soup.find(name="div",
                        attrs={
                            "class": "lptypebar",
                            "id": "building_dd"
                        })
    datanow = soup.find(name="div", attrs={"class": "lptypebarin"})
    presell_id = ""
    tab = datanow.a
    i = 1
    for sib in tab.next_siblings:
        if (type(sib) == type(tab)):
            presell_id = sib.attrs["id"]
            logger.info(str(i) + "  " + str(presell_id))
            break

    id = presell_id.split('_')[1]
    logger.info("presell id: " + presell_id)
    return id
コード例 #35
0
ファイル: main.py プロジェクト: agniecha95/Projekt-zaliczenie
    added = []
    for name in after:
        if not name in before:
            ext = os.path.splitext(name)[-1].lower()
            if ext == ".csv":
                added.append(name)

    removed = []
    for name in before:
        if not name in after:
            removed.append(name)

    if added:
        for added_file in added:  # dla każdego pliku z listy dodanych wykonaj:
            if 'coronavirus' in added_file:
                print("".join([str(name) for name in added_file]))
                db.coronavirus_analysis(r'E:\Moje\Python szkolenie\Projekt-zaliczenie\files\coronavirus_cases.csv')
            if 'vacinnes' in added_file:
                db.vacinnes_analysis(r'E:\Moje\Python szkolenie\Projekt-zaliczenie\files\vacinnes.csv')
    else:
        logger.info("Nothing was added")

    if removed:
        logger.info(f'Removed {removed}')
    else:
        logger.info('Nothing was removed')

    before = after
    db.close_conn()
    time.sleep(10)
コード例 #36
0
def GoodOut(buildings, title, files2mail):
    logger.info("----------- GoodOut: " + title)
    for rd_list in buildings.values():
        rd_list.sort(key=price_common.takeSecond, reverse=True)

        mosthighlevl = 1
        maxroomonelevel = 2
        # 遍历builingmap
        fmtbuildings = {}
        for k, rd_list in buildings.items():
            # 获取楼幢 及 单元
            tmpbuilding, tmpUnit = price_common.getBuilding(k)

            # 获取 楼幢的 单元map
            if tmpbuilding not in fmtbuildings:
                fmtbuildings[tmpbuilding] = {}
            tmpUnitMap = fmtbuildings.get(tmpbuilding)

            if tmpUnit not in tmpUnitMap:
                tmpUnitMap[tmpUnit] = {}
            levelMap = tmpUnitMap.get(tmpUnit)

            # 遍历 每栋里的房间
            for rd in rd_list:
                level = int(price_common.getRoomNum(rd.room)) / 100
                if level > mosthighlevl:
                    mosthighlevl = level
                if level not in levelMap:
                    levelMap[level] = []
                roomlist = levelMap.get(level)
                roomlist.append(rd)
                if len(roomlist) > maxroomonelevel:
                    maxroomonelevel = len(roomlist)

    writebuildings = {}
    startrow = 1
    startqueue = 2
    bdst = sorted(fmtbuildings.iteritems(), key=price_common.bd_sort_key)
    for bditem in bdst:
        bd = bditem[0]
        utmp = bditem[1]
        startqueue = startqueue + 2
        currow = startrow
        curqueue = startqueue
        startqueue = startqueue + 2 + (len(utmp) * maxroomonelevel)

        # 楼幢
        bdqueue = curqueue + (len(utmp) * maxroomonelevel) / 2
        logger.info(currow, bdqueue, bd)
        insertrecord(writebuildings, currow, bdqueue, bd)
        unitrow = currow + 1
        unitidx = 1
        leftlevelqueue = curqueue - 1

        rightlevelqueue = curqueue + (len(utmp) * maxroomonelevel)
        roomstartqueue = curqueue
        needlevelqueue = True

        utst = sorted(utmp.iteritems(), key=price_common.ut_sort_key)
        for utitem in utst:
            ut = utitem[0]
            lvmp = utitem[1]
            # 单元
            unitqueue = curqueue + maxroomonelevel * (unitidx - 1)
            logger.info(unitrow, unitqueue, ut)
            insertrecord(writebuildings, unitrow, unitqueue, ut)
            levelrow = unitrow + 2
            roomstartqueue = unitqueue

            realneedlevelqueue = True
            if needlevelqueue:
                realneedlevelqueue = True
                needlevelqueue = False
            else:
                realneedlevelqueue = False

            lvst = sorted(lvmp.iteritems(), reverse=True)

            for roommark in range(0, maxroomonelevel):
                insertrecord(writebuildings, unitrow + 1,
                             roomstartqueue + roommark,
                             str(roommark + 1) + u'号室', 0, 5000)

            for lvitem in lvst:
                lv = lvitem[0]
                rl = lvitem[1]
                if realneedlevelqueue:
                    logger.info(levelrow, leftlevelqueue, lv)
                    logger.info(levelrow, rightlevelqueue, lv)
                    insertrecord(writebuildings, levelrow, leftlevelqueue,
                                 str(lv) + u'楼')
                    insertrecord(writebuildings, levelrow, rightlevelqueue,
                                 str(lv) + u'楼')
                curroomqueue = roomstartqueue
                curroomrow = levelrow
                for rmdt in rl:
                    logger.info(curroomrow, curroomqueue, rmdt.room)
                    detail = rmdt.area + "\n" + rmdt.record_price + "\n" + rmdt.total_price
                    insertrecord(writebuildings, curroomrow, curroomqueue,
                                 detail, 3000, 0)
                    curroomqueue = curroomqueue + 1

                levelrow = levelrow + 1
            unitidx = unitidx + 1

    # logger.info('-------------')

    xlfile = xlwt.Workbook(encoding='utf-8')
    xltable = xlfile.add_sheet(title)
    for row, ql in writebuildings.items():
        for queuerecord in ql:
            logger.info(str(row) + str(queuerecord[0]) + str(queuerecord[1]))
            xltable.write(row, queuerecord[0], queuerecord[1])
            if queuerecord[2] != 0:
                xltable.row(row).height = queuerecord[2]

            if queuerecord[3] != 0:
                xltable.col(queuerecord[0]).width = queuerecord[3]
    postfix = time.strftime("_%Y%m%d_%H%M%S.xls", time.localtime())
    filename = "./" + title + postfix
    xlfile.save(filename)
    files2mail.append(filename)
    return True
コード例 #37
0
ファイル: news_check.py プロジェクト: ChantWei/room_price
def UpdateCheckPoint(update_info):
    global g_cur_point
    new_point = update_info.name + "," + update_info.short_url
    if g_cur_point == update_info.short_url:
        logger.info("old record url, no need update: " + new_point)
        return
    logger.info("record check point is : " + g_cur_point +
                " , will update to : " + new_point)

    global g_file_name
    fd = open(g_file_name, "w")
    fd.seek(0, 0)
    point = new_point + "\n"
    fd.write(point)

    g_cur_point = update_info.short_url
    logger.info(g_file_name + "record check point has update to: " +
                g_cur_point + " for " + update_info.name)
    return


if __name__ == "__main__":
    update_info = PreSellInfo("府", "price.htm")
    LoadCheckPoint()
    UpdateCheckPoint(update_info)

    al = []
    al.append("aaaa")
    al.append("bbb")
    logger.info(al)
コード例 #38
0
        ### 3.1. Compute random samples for which shap values will be computed
        df_distribution = get_saltelli_sample(problem,
                                              10000,
                                              calc_second_order=False)
        df_sample = df_distribution.sample(300)  # We used saltelli_sample as
        # a random generator, but the final number of sample is given in this line

        ### 3.2. Init KernelShap
        reference = shap.sample(df_distribution,
                                50)  # These samples are used to
        # compute to expectations of variables that are "shut-down" by shap.
        explainer = shap.KernelExplainer(f_algo, reference)

        ### 3.3. Perform shap values computation
        shap_values = explainer.shap_values(df_sample)

        ### 3.4. Aggregate over samples defined in 3.1. and log it
        df_shap = pd.Series(np.abs(shap_values).mean(axis=0),
                            index=problem['names'])

        shap_total_list = []
        for variable in sorted(VAR_DICT_NO_MSF.keys()):
            shap_total_list.append("{:.5f}".format(df_shap[variable]))

        shap_str = ';'.join(shap_total_list)

        logger.setLevel(logging.DEBUG)
        logger.info('SHAP_IMPORTANCE;{};{}'.format(scenario, shap_str))
        logger.setLevel(logging.ERROR)
コード例 #39
0
ファイル: DeepFM.py プロジェクト: JNUpython/tensorflow-DeepFM
    def _init_graph(self):
        self.graph = tf.Graph()
        with self.graph.as_default():

            tf.set_random_seed(self.random_seed)
            # placeholder
            self.feat_index = tf.placeholder(tf.int32,
                                             shape=[None, self.field_size],
                                             name="feat_index")  # None * F
            self.feat_value = tf.placeholder(tf.float32,
                                             shape=[None, self.field_size],
                                             name="feat_value")  # None * F
            logger.info(self.feat_index.shape)
            logger.info(self.feat_value.shape)
            self.label = tf.placeholder(tf.float32,
                                        shape=[None, 1],
                                        name="label")  # None * 1
            self.dropout_keep_fm = tf.placeholder(tf.float32,
                                                  shape=[None],
                                                  name="dropout_keep_fm")
            self.dropout_keep_deep = tf.placeholder(tf.float32,
                                                    shape=[None],
                                                    name="dropout_keep_deep")
            self.train_phase = tf.placeholder(tf.bool, name="train_phase")
            # 初始化模型的参数
            self.weights = self._initialize_weights()
            pprint(self.weights)

            # model
            self.embeddings = tf.nn.embedding_lookup(
                self.weights["feature_embeddings"], self.feat_index
            )  # None * F(39) * K # feature_embeddings= 259 * k
            feat_value = tf.reshape(self.feat_value,
                                    shape=[-1, self.field_size,
                                           1])  # None * 39 * 1
            self.embeddings = tf.multiply(self.embeddings,
                                          feat_value)  # 将连续变量做一个乘法处理
            logger.info(self.embeddings)  # None * 39 * K(8)

            # ---------- first order term ----------
            self.y_first_order = tf.nn.embedding_lookup(
                self.weights["feature_bias"],
                self.feat_index)  # None * F * 1  # feature_bias 259 * 1
            self.y_first_order = tf.reduce_sum(
                tf.multiply(self.y_first_order, feat_value),
                2)  # None * F(39)  # 线性组合部分, 常数项没有?
            self.y_first_order = tf.nn.dropout(
                self.y_first_order, self.dropout_keep_fm[0])  # None * F

            # ---------- second order term ---------------
            # sum_square part  # 元素和的平方
            self.summed_features_emb = tf.reduce_sum(self.embeddings,
                                                     1)  # None * K
            self.summed_features_emb_square = tf.square(
                self.summed_features_emb)  # None * K

            # square_sum part # 平方的加和
            self.squared_features_emb = tf.square(self.embeddings)
            self.squared_sum_features_emb = tf.reduce_sum(
                self.squared_features_emb, 1)  # None * K

            # second order
            self.y_second_order = 0.5 * tf.subtract(
                self.summed_features_emb_square,
                self.squared_sum_features_emb)  # None * K
            self.y_second_order = tf.nn.dropout(
                self.y_second_order, self.dropout_keep_fm[1])  # None * K

            # ---------- Deep component ----------
            self.y_deep = tf.reshape(self.embeddings,
                                     shape=[
                                         -1,
                                         self.field_size * self.embedding_size
                                     ])  # None * (F*K)  # FM 和 deep 共享输入
            self.y_deep = tf.nn.dropout(self.y_deep, self.dropout_keep_deep[0])
            for i in range(0, len(self.deep_layers)):
                self.y_deep = tf.add(
                    tf.matmul(self.y_deep, self.weights["layer_%d" % i]),
                    self.weights["bias_%d" % i])  # None * layer[i] * 1
                if self.batch_norm:
                    self.y_deep = self.batch_norm_layer(
                        self.y_deep,
                        train_phase=self.train_phase,
                        scope_bn="bn_%d" % i)  # None * layer[i] * 1
                self.y_deep = self.deep_layers_activation(self.y_deep)
                self.y_deep = tf.nn.dropout(
                    self.y_deep,
                    self.dropout_keep_deep[1 +
                                           i])  # dropout at each Deep layer

            # ---------- DeepFM ----------
            if self.use_fm and self.use_deep:
                concat_input = tf.concat(
                    [self.y_first_order, self.y_second_order, self.y_deep],
                    axis=1)  # None *(F + K + deeplayers[-1] nodes)
            elif self.use_fm:
                concat_input = tf.concat(
                    [self.y_first_order, self.y_second_order], axis=1)  #
            elif self.use_deep:
                concat_input = self.y_deep
            logger.info(concat_input)
            self.out = tf.add(
                tf.matmul(concat_input, self.weights["concat_projection"]),
                self.weights["concat_bias"])

            # loss
            if self.loss_type == "logloss":
                self.out = tf.nn.sigmoid(self.out)
                self.loss = tf.losses.log_loss(self.label, self.out)
            elif self.loss_type == "mse":
                self.loss = tf.nn.l2_loss(tf.subtract(self.label, self.out))
            # l2 regularization on weights
            if self.l2_reg > 0:
                self.loss += tf.contrib.layers.l2_regularizer(self.l2_reg)(
                    self.weights["concat_projection"])
                if self.use_deep:
                    for i in range(len(self.deep_layers)):
                        self.loss += tf.contrib.layers.l2_regularizer(
                            self.l2_reg)(self.weights["layer_%d" % i])

            # optimizer
            if self.optimizer_type == "adam":
                self.optimizer = tf.train.AdamOptimizer(
                    learning_rate=self.learning_rate,
                    beta1=0.9,
                    beta2=0.999,
                    epsilon=1e-8).minimize(self.loss)
            elif self.optimizer_type == "adagrad":
                self.optimizer = tf.train.AdagradOptimizer(
                    learning_rate=self.learning_rate,
                    initial_accumulator_value=1e-8).minimize(self.loss)
            elif self.optimizer_type == "gd":
                self.optimizer = tf.train.GradientDescentOptimizer(
                    learning_rate=self.learning_rate).minimize(self.loss)
            elif self.optimizer_type == "momentum":
                self.optimizer = tf.train.MomentumOptimizer(
                    learning_rate=self.learning_rate,
                    momentum=0.95).minimize(self.loss)
            elif self.optimizer_type == "yellowfin":
                self.optimizer = YFOptimizer(learning_rate=self.learning_rate,
                                             momentum=0.0).minimize(self.loss)

            # init
            self.saver = tf.train.Saver()
            init = tf.global_variables_initializer()
            self.sess = self._init_session()
            self.sess.run(init)
            # save_path = self.saver.save(self.sess, save_path=os.path.join(SUB_DIR, "model"), global_step=0)
            # logger.info("模型初始化完成,保存路径为:{}".format(save_path))
            # writer = tf.summary.FileWriter("./logs", self.sess.graph)

            # number of params
            total_parameters = 0
            for variable in self.weights.values():
                shape = variable.get_shape()
                variable_parameters = 1
                for dim in shape:
                    variable_parameters *= dim.value
                total_parameters += variable_parameters
            if self.verbose > 0:
                print("#params: %d" % total_parameters)
コード例 #40
0
def insert_static_in_mongo():
    from data_params import PROJECT, STATIC, DATABASE
    #insert path to scrapped data
    common_path = "../" + PROJECT + "/1.getting the data/3.static_files/"
    #insert each scrapped file in mongodb
    for file in STATIC:
        logger.info('inserting static data')
        if file['type'] == 'csv':
            try:
                df = pd.read_csv(common_path + file['file'])
            except:
                logger.info('No file ' + file['file'] + ' of type ' + file['type'] + ' in project statics folder')
                sys.exit()
        elif file['type'] == 'excel':
            try:
                df = pd.read_excel(
                            common_path + file['file'],
                            sheetname=file['sheetname'],
                            skiprows=file['skiprows']
                            )
            except:
                logger.info('No file ' + file['file'] + ' of type ' + file['type'] + ' in project statics folder')
                sys.exit()
        elif file['type'] == 'json':
            try:
                df = pd.read_json(common_path + file['file'])
            except:
                logger.info('No file ' + file['file'] + ' of type ' + file['type'] + ' in project statics folder')
                sys.exit()
        logger.info('inserting df with shape: ' + str(df.shape))
        dataframe_to_mongo(df, DATABASE,file['collection_name'], erase=True)
        logger.info('insertion sucessful in db' + DATABASE + ' of collection: ' + file['collection_name'])