Example #1
0
def from_file_airport_insert(config, param, airport_paths):
    path = ''.join([base_path, str(param), '/'])
    update_sql = "insert into airport(iata_code,name,name_en,city_id,belong_city_id,map_info,status,time2city_center,inner_order) values(%s,%s,%s,%s,%s,%s,%s,%s,%s)"
    conn = pymysql.connect(**config)
    cursor = conn.cursor()
    _count = 0
    save_result = []
    logger = get_logger('step3', path)
    for airport_path in airport_paths:
        # logger.debug("函数名:{0},入机场文件名:{1}".format(from_file_airport_insert.__name__, airport_path))
        with open(path + airport_path, 'r+') as airport:
            reader = csv.DictReader(airport)
            for row in reader:
                _count += 1
                # logger.debug(row)
                save_result.append(
                    (row['iata_code'], row['name'], row['name_en'],
                     row['city_id'], row['belong_city_id'], row['map_info'],
                     row['status'], row['time2city_center'],
                     row['inner_order']))
                if len(save_result) >= 1000:
                    cursor.executemany(update_sql, save_result)
                    conn.commit()
                    save_result = []

    if save_result:
        cursor.executemany(update_sql, save_result)
        conn.commit()
        save_result = []
    return _count
Example #2
0
    def __init__(self, cfg, name):
        tf.keras.Model.__init__(self)
        self.model_name = name
        self.logger = get_logger()
        self.logger.info('Using model: {:s}'.format(self.model_name))

        self._num_classes = cfg.dataset.num_classes
        self._input_shape = None

        return
Example #3
0
 def __init__(self, db_name, coll_name):
     self.logger = get_logger("test_count")
     self.tasks = []
     client = pymongo.MongoClient(
         'mongodb://*****:*****@10.19.2.103:27017/')
     self.collections = client[db_name][coll_name]
     self.create_index()
     # 数据游标偏移量,用于在查询时发生异常恢复游标位置
     self.offset = 0
     # 数据游标前置偏移量,用于在入库时恢复游标位置
     self.pre_offset = 0
Example #4
0
    def __init__(self, args):
        self.cfg = get_config(
            args)  # Get dictionary with configuration parameters
        self.logger = get_logger()
        self._pipeline = dict()

        self.loss_fn = tf.losses.categorical_crossentropy  # Loss function
        self.net = None  # Main network instance
        self.opt = None  # Optimizer instance
        self._lr_scheduler = CustomLearningRateScheduler()
        self._ckpt_hdl = CheckpointHandler(self.cfg)

        return
    def __init__(self, cfg):
        """

        :param cfg: Configuration
        """
        self._logger = get_logger()
        self._cfg = cfg

        # A tensor to hold the checkpoint integer id
        self._ckpt_id = None

        self._ckpt = None  # An object of class tf.train.Checkpoint()
        self._ckpt_mngr = None  # An object of class tf.train.CheckpointManager()
        self._ckpt_status = None  # Holds result of self._ckpt.restore(). Used to verify a successful restore

        return
Example #6
0
def main():
    logger = get_logger()
    args = parse_args()
    sqz = DevelopSqueezenet(args)
    sqz.load_checkpointables(
        sqz.cfg.model_saver.checkpoint_id)  # Load checkpoint

    # A call to make sure that a concrete function exists for this polymorphic function
    concrete_fn = sqz.net.call.get_concrete_function(batch_x=tf.TensorSpec(
        [None, 224, 224, 3], tf.float32),
                                                     training=False)

    logger.info('Saving the model in directory: {:s}'.format(
        sqz.cfg.directories.dir_model))
    tf.saved_model.save(sqz.net, sqz.cfg.directories.dir_model)  # Save model
    return
Example #7
0
    def __init__(self, worker, source, _type, task_name, routine_key, queue,
                 **kwargs):
        # 任务基本信息
        self.worker = worker
        self.source = source
        self.type = _type
        self.task_name = task_name
        self.routine_key = routine_key
        self.queue = queue
        self.task_type = kwargs.get('task_type', TaskType.NORMAL)

        self.priority = int(kwargs.get("priority", 3))
        self.logger = get_logger("InsertMongoTask")
        self.tasks = TaskList()

        self.collection_name = self.generate_collection_name()

        # 数据游标偏移量,用于在查询时发生异常恢复游标位置
        self.offset = 0
        # 数据游标前置偏移量,用于在入库时恢复游标位置
        self.pre_offset = 0

        client = pymongo.MongoClient(host='10.10.231.105')
        self.db = client['MongoTask']

        # 建立所需要的全部索引
        self.create_mongo_indexes()

        # CITY TASK 获取 date_list
        if self.task_type == TaskType.CITY_TASK:
            self.date_list = self.generate_list_date()
        else:
            self.date_list = None

        # 修改 logger 日志打印
        # modify handler's formatter
        datefmt = "%Y-%m-%d %H:%M:%S"
        file_log_format = "%(asctime)-15s %(threadName)s %(filename)s:%(lineno)d %(levelname)s " \
                          "[source: {}][type: {}][task_name: {}][collection_name: {}]:        %(message)s".format(
            self.source, self.type, self.task_name, self.collection_name)
        formtter = logging.Formatter(file_log_format, datefmt)

        for each_handler in self.logger.handlers:
            each_handler.setFormatter(formtter)
        self.logger.info("[init InsertTask]")
Example #8
0
def main():
    # log_option:
    #    0: No logging
    #    1: On screen logs
    #    2: 1 + File logging to logs/latest.log
    #    3: 2 + File logging to logs/<timestamp>.log
    setup_logger(log_option=2)
    logger = get_logger()

    args = parse_args()
    dev = DevelopSqueezenet(args)

    try:
        dev.run()
    except Exception as e:
        logger.exception(str(e))

    return
Example #9
0
def insert_airport(path=None):
    logger = get_logger('update_airport')
    cursor = city_conn.cursor()
    select_sql = "SELECT id FROM city WHERE name=%s"
    update_sql = "UPDATE airport SET iata_code=%s,NAME=%s,name_en=%s,city_id=%s,belong_city_id=%s,map_info=%s,STATUS=%s,time2city_center=%s,inner_order=%s WHERE id=%s"
    insert_sql = "INSERT INTO airport(iata_code,name,name_en,city_id,belong_city_id,map_info,status,time2city_center,inner_order) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s)"

    with open(path, 'r+') as airport:
        reader = csv.reader(airport)
        next(reader)
        logger.debug(
            "更新的字段:iata_code,name,name_en,city_id,belong_city_id,map_info,status,\
            time2city_center,in")
        for row in reader:
            try:
                map_info = row[6].replace(',', ',').split(',')
                map_info = ','.join([map_info[1].strip(), map_info[0].strip()])
                if not str(row[4]).isdigit():
                    cursor.execute(select_sql, (row[4], ))
                    city_id = cursor.fetchone()[0]
                    if city_id:
                        cursor.execute(
                            update_sql,
                            (row[1], row[2], row[3], city_id, city_id,
                             map_info, row[7], row[8], row[9], row[0]))
                        city_conn.commit()
                        logger.debug(
                            "更新后的结果:{0},{1},{2},{3},{4},{5},{6},{7},{8},{9},".
                            format(row[0], row[1], row[2], row[3], city_id,
                                   city_id, map_info, row[7], row[8], row[9]))
                elif str(row[4]).isdigit():
                    cursor.execute(update_sql,
                                   (row[1], row[2], row[3], row[4], row[5],
                                    map_info, row[7], row[8], row[9], row[0]))
                    city_conn.commit()
                    logger.debug(
                        "更新后的结果:{0},{1},{2},{3},{4},{5},{6},{7},{8},{9},".
                        format(row[0], row[1], row[2], row[3], row[4], row[5],
                               map_info, row[7], row[8], row[9]))
            except Exception as e:
                print(traceback.format_exc())
                city_conn.rollback()
Example #10
0
def _set_directories(cfg):
    """
    Sets up directory paths in cfg and creates them if directory doesn't exist
    :param cfg: An EasyDict dictionary for configuration parameters
    :return: None
    """
    logger = get_logger()

    # Repository path
    cfg.directories.dir_repo = os.path.abspath(
        os.path.join(os.path.dirname(__file__), '..'))

    # Convert all directory paths to absolute paths
    for directory in cfg.directories.keys():
        if not os.path.isabs(directory):
            cfg.directories[directory] = os.path.join(
                cfg.directories.dir_repo, cfg.directories[directory])

    # Create directories if they don't exist
    os.makedirs(cfg.directories.dir_model, exist_ok=True)
    logger.debug('Model save directory: {:s}'.format(
        cfg.directories.dir_model))
    os.makedirs(cfg.directories.dir_log, exist_ok=True)
    logger.debug('Log dump directory: {:s}'.format(cfg.directories.dir_log))
    os.makedirs(cfg.directories.dir_tb_home, exist_ok=True)
    # Tensorboard directory
    cfg.directories.dir_tb = os.path.join(cfg.directories.dir_tb_home,
                                          time.strftime("%Y-%m-%d_%H-%M-%S"))
    logger.debug('Tensorboard directory: {:s}'.format(cfg.directories.dir_tb))

    os.makedirs(cfg.directories.dir_ckpt, exist_ok=True)
    logger.debug('Checkpoint directory: {:s}'.format(cfg.directories.dir_ckpt))
    cfg.directories.dir_ckpt_train = os.path.join(cfg.directories.dir_ckpt)
    os.makedirs(cfg.directories.dir_ckpt_train, exist_ok=True)
    logger.debug('Checkpoint train parameters directory: {:s}'.format(
        cfg.directories.dir_ckpt))

    return
Example #11
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2017/12/26 下午8:16
# @Author  : Hou Rong
# @Site    :
# @File    : MkContent.py
# @Software: PyCharm
from warnings import filterwarnings

import pymysql
from service_platform_conn_pool import base_data_pool, fetchall, new_service_platform_pool
from itertools import permutations
from my_logger import get_logger

logger = get_logger("train_content")
filterwarnings('ignore', category=pymysql.err.Warning)

offset = 0


def get_task():
    sql = '''SELECT
  DISTINCT
  city_id,
  src_city_code
FROM station_src, station_relation
WHERE
  station_src.src_station_code IS NOT NULL AND station_src.station_id = station_relation.station_id AND city_id != '' GROUP BY city_id;'''
    for line in fetchall(base_data_pool, sql):
        yield line
Example #12
0
# @Author  : Hou Rong
# @Site    :
# @File    : multi_city.py
# @Software: PyCharm
import gevent.monkey

gevent.monkey.patch_all()

import time
import gevent.pool
from my_logger import get_logger
from service_platform_conn_pool import base_data_pool
from poi_ori.poi_insert_db import poi_insert_data
from poi_ori.already_merged_city import init_already_merged_city

logger = get_logger("multi_city_insert_db")

pool = gevent.pool.Pool(size=16)


def poi_ori_insert_data(poi_type, cids=None):
    already_merged_city = init_already_merged_city(
        poi_type="{}_data".format(poi_type))
    if not cids:
        conn = base_data_pool.connection()
        cursor = conn.cursor()
        cursor.execute('''SELECT id
    FROM city;''')
        cids = list(map(lambda x: x[0], cursor.fetchall()))
        cursor.close()
        conn.close()
Example #13
0
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import pandas as pd
import connectmongo
import my_logger
from datetime import datetime
import time
from send_mail import send_warning
import settings

logger = my_logger.get_logger('PI-COVID')
logger.info("Começando atualização")

# Seleciona escopo de acesso
scope = ['https://www.googleapis.com/auth/spreadsheets']

# # Credenciais
# credentials = service_account.Credentials.from_service_account_file(filename="credentials.json")
# scopedCreds = credentials.with_scopes(scope)
# gc = gspread.Client(auth=scopedCreds)
# gc.session = AuthorizedSession(scopedCreds)

# Autoriza acesso
creds = ServiceAccountCredentials.from_json_keyfile_name(
    filename=settings.CRED_PATH, scopes=scope)
gc = gspread.authorize(creds)

# Informações da planilha
PICOVID = gc.open_by_key(settings.SPREAD_ID)
PAGINAS = PICOVID.worksheets()
REGEX_FALSE = r'^=IF\(\$A.*<>\"\";false;""\)$'
Example #14
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2017/11/23 上午11:29
# @Author  : Hou Rong
# @Site    :
# @File    : poi_city_mapping.py
# @Software: PyCharm
import re
import pandas
import dataset
from service_platform_conn_pool import base_data_pool, source_info_str, source_info_pool, fetchall
from my_logger import get_logger

logger = get_logger("city_mapping")

conn = base_data_pool.connection()
cursor = conn.cursor()
cursor.execute('''SELECT
  city.id      AS city_id,
  country.mid  AS country_id,
  city.name    AS city_name,
  country.name AS country_name
FROM city
  JOIN country ON city.country_id = country.mid;''')
city_info = {line[0]: line for line in cursor.fetchall()}
conn.close()


def hotels_get_geo_id_by_dest_id(dest_id):
    sql = '''SELECT sid
FROM ota_location
Example #15
0
# @Author  : Hou Rong
# @Site    :
# @File    : load_final_data.py
# @Software: PyCharm
import os
import pymysql
import time
import pymysql.err
from warnings import filterwarnings
from service_platform_conn_pool import base_data_final_pool
from my_logger import get_logger

# ignore pymysql warnings
filterwarnings('ignore', category=pymysql.err.Warning)

logger = get_logger("load_data")

final_database = 'BaseDataFinal'

final_table = {
    "hotel": "hotel_detail.sql",
    "attr": "daodao_attr_detail.sql",
    "rest": "daodao_rest_detail.sql",
    "total": "qyer_detail.sql"
}

time_key = {
    "hotel_detail": "update_time",
    "attr_detail": "utime",
    "rest_detail": "utime",
    "total_detail": "insert_time",
Example #16
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2017/10/18 上午9:30
# @Author  : Hou Rong
# @Site    :
# @File    : unknown_keywords.py
# @Software: PyCharm
from service_platform_conn_pool import poi_ori_pool
from toolbox.Hash import encode
from my_logger import func_time_logger, get_logger

logger = get_logger("insert unknown keywords")

count = 0


@func_time_logger
def insert_unknown_keywords(_type, _keyword_or_keywords):
    conn = poi_ori_pool.connection()
    cursor = conn.cursor()
    sql = '''INSERT IGNORE INTO unknown_keywords (`type`, `key_hash`, `keywords`) VALUES (%s, %s, %s);'''
    if isinstance(_keyword_or_keywords, str):
        _hash_key = encode(_keyword_or_keywords)
        cursor.execute(sql, (_type, _hash_key, _keyword_or_keywords))
    elif isinstance(_keyword_or_keywords, (list, set, tuple)):
        for each_keyword in _keyword_or_keywords:
            _hash_key = encode(each_keyword)
            cursor.execute(sql, (_type, _hash_key, each_keyword))
    else:
        logger.debug(
            "[unknown _keyword_or_keywords type: {}][_type: {}][_keyword_or_keywords: {}]"
Example #17
0
# project name: pyranometer
# created by diego aliaga daliaga_at_chacaltaya.edu.bo
import datetime
import sqlalchemy as sa
from sqlalchemy.orm import Session
from sqlalchemy.ext.automap import automap_base
import pandas as pd
import my_logger
import db_sync.functions as functions

logger = my_logger.get_logger(name=__name__, level='DEBUG')


class Database(object):
    """
    a class

    Attributes:
        dic: None
        table: None
        user: None
        psw: None
        ip: None
        database_name: None
        time_column_name: None
        url: None
        eng: None
        meta: None
        base: None
        time_column: None
        session: None
Example #18
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2017/12/7 上午12:27
# @Author  : Hou Rong
# @Site    :
# @File    : img_larger_than_90_update.py
# @Software: PyCharm
import json
import pymysql.cursors
from data_source import MysqlSource
from service_platform_conn_pool import base_data_final_pool
from my_logger import get_logger

logger = get_logger("img_larger_than_90_update")

poi_ori_config = {
    'host': '10.10.228.253',
    'user': '******',
    'password': '******',
    'charset': 'utf8',
    'db': 'BaseDataFinal'
}


def update_sql(sid, data):
    sql = '''UPDATE poi_images
SET `use` = 0, info = %s
WHERE source = 'qyer' AND sid = '{}' AND file_name = %s;'''.format(sid)
    conn = base_data_final_pool.connection()
    cursor = conn.cursor()
    _res = cursor.executemany(sql, data)
Example #19
0
def task_start():
    param = sys.argv[1]
    # param = '706'
    zip_path = get_zip_path(param)
    file_name = zip_path.split('/')[-1]
    zip_path = ''.join([base_path, file_name])
    zip = zipfile.ZipFile(zip_path)
    file_name = zip.filename.split('.')[0].split('/')[-1]
    path = ''.join([base_path, str(param), '/'])
    logger = get_logger('step3', path)
    save_path = []
    database_name = ''.join(['add_city_', param])
    temp_config = config
    temp_config['db'] = database_name
    if path.endswith('/'):
        file_path = ''.join([path, file_name])
    else:
        file_path = '/'.join([path, file_name])
    file_list = os.listdir(file_path)
    hotels_path = None
    for child_file in file_list:
        child_file_path = '/'.join([file_path, child_file])
        if ('新增城市' in child_file) and (len(child_file.split('.')[0]) == 4):
            city_path = child_file_path
        elif ('新增机场' in child_file) and (len(child_file.split('.')[0]) == 4):
            airport_path = child_file_path
        elif os.path.isdir(child_file_path):
            picture_path = child_file_path
        elif '酒店配置' in child_file:
            hotels_path = child_file_path
        elif '景点配置' in child_file:
            attr_path = child_file_path

    conf = configparser.ConfigParser()
    conf.read('/search/cuixiyi/ks3up-tool-2.0.6-20170801/city.conf',
              encoding='utf-8')
    conf.set('city', 'srcPrefix', picture_path)
    conf.write(open('/search/cuixiyi/ks3up-tool-2.0.6-20170801/city.conf',
                    'w'))
    judge_city_id = 1
    try:
        return_result = defaultdict(dict)
        return_result['data'] = {}
        return_result['error']['error_id'] = 0
        return_result['error']['error_str'] = ''
        conn = pymysql.connect(**test_config)
        cursor = conn.cursor()

        logger.debug("新增城市入库执行开始")
        city_base_path = ''.join([base_path, str(param), '/'])
        city_infos = read_file(city_path, temp_config, city_base_path)
        # if city_infos:
        #     select_sql = "select * from city where id=%s"
        #     with open(path+'city_id.csv','r+') as city:
        #         reader = csv.DictReader(city)
        #         for row in reader:
        #             city_id = row['city_id']
        #             cursor.execute(select_sql,(city_id,))
        #             if cursor.fetchall():
        #                 judge_city_id = 0
        #                 break

        logger.debug("[新增城市入库执行完毕]")
        logger.debug("[新增城市图片名称更新开始]")
        if judge_city_id:
            city_map_path = revise_pictureName(picture_path, temp_config,
                                               param)
            logger.debug("[新增城市的图片名称更新完毕]")
        logger.debug("城市更新后的图片名称更新到city表响应的new_product_pic字段-开始")
        update_city_picture = update_city_pic(picture_path, temp_config, param)

        logger.debug("城市更新后的图片名称更新到city表响应的new_product_pic字段-结束")
        logger.debug("新增机场入库开始执行")
        # if judge_city_id:
        #     new_airport_insert(temp_config, param)
        logger.debug("新增机场入库执行完毕")
        logger.debug("为城市提供共享机场开始执行")

        share_airport_to_data_path = []
        if not airport_path:
            need_share_airport_path = list(
                update_share_airport(temp_config, param))

        elif airport_path:
            share_airport_path = from_file_get_share_airport(config, param)
            citys = share_airport_path[2]
            airport_infos = share_airport_path[3]
            if citys:
                need_share_airport_path = list(
                    update_share_airport(temp_config, param, citys,
                                         airport_infos))
            else:
                need_share_airport_path = []
            share_airport_to_data_path = list(share_airport_path)[:2]
            with open(path + 'city_airport_info.csv', 'w+') as city:
                writer = csv.writer(city)
                writer.writerow(
                    ('id_number', 'city_id', 'city_name', 'city_name_en',
                     'country_id', 'city_map_info', 'new_product_city_pic',
                     'airport_name', 'airport_name_en', 'airport_map_info',
                     'airport_belong_city_id', 'airport_from'))
                for city_id in city_infos.keys():
                    temp_save = []
                    temp_save.extend([
                        city_infos[city_id]['id_number'], city_id,
                        city_infos[city_id]['city_name'],
                        city_infos[city_id]['city_name_en'],
                        city_infos[city_id]['country_id'],
                        city_infos[city_id]['city_map_info']
                    ])
                    if not update_city_picture.get(str(city_id), None):
                        pic_name = ''
                    else:

                        pic_name = update_city_picture[str(
                            city_id)]['new_product_city_pic']

                    temp_save.append(pic_name)
                    if airport_infos.get(city_id, None):
                        temp_save.extend([
                            airport_infos[city_id]['airport_name'],
                            airport_infos[city_id]['airport_name_en'],
                            airport_infos[city_id]['airport_map_info'],
                            airport_infos[city_id]['airport_belong_city_id'],
                            airport_infos[city_id]['airport_from']
                        ])
                    writer.writerow(temp_save)
                need_share_airport_path.append('city_airport_info.csv')
        if need_share_airport_path and judge_city_id:
            for airport_file_path in need_share_airport_path:
                airport_file_path = '/'.join([param, airport_file_path])
                save_path.append(airport_file_path)
                temp_path = ''.join([base_path, airport_file_path])
                os.system("rsync -vI {0} 10.10.150.16::opcity/{1}".format(
                    temp_path, param))
        city_airport_data = pandas.read_csv(path + 'city_airport_info.csv')
        data = {}
        data['新增城市总数'] = len(city_airport_data.values)
        data['无机场城市数量'] = len(city_airport_data[
            city_airport_data['airport_from'].isnull()].values)
        data['有机场城市数量'] = len(
            city_airport_data[city_airport_data['airport_from'].apply(
                lambda x: x == '标注机场', )].values)
        data['有共享机场城市数量'] = len(
            city_airport_data[city_airport_data['airport_from'].apply(
                lambda x: x == '生成共享机场', )].values)
        with open(path + 'city_airport_count.csv', 'w+') as city:
            writer = csv.DictWriter(
                city, fieldnames=['新增城市总数', '有机场城市数量', '有共享机场城市数量', '无机场城市数量'])
            writer.writeheader()
            writer.writerow(data)
            count_file = '/'.join([param, 'city_airport_count.csv'])
            save_path.append(count_file)
            temp_path = ''.join([base_path, count_file])
            os.system("rsync -vI {0} 10.10.150.16::opcity/{1}".format(
                temp_path, param))
        logger.debug("城市共享机场执行完毕")
        logger.debug("城市共享机场入库开始")
        if judge_city_id and share_airport_to_data_path:
            count = from_file_airport_insert(temp_config, param,
                                             share_airport_to_data_path)

        logger.debug("城市共享机场入库结束,机场入库总数:{0}".format(count))
        logger.debug("将新增城市更新到ota_location的各个源-开始")
        if hotels_path and judge_city_id:
            add_others_source_city(city_path, hotels_path, attr_path,
                                   temp_config, param)
        logger.debug("将新增城市更新到ota_location的各个源-结束")
        return_result = json.dumps(return_result)
        print('[result][{0}]'.format(return_result))
        csv_path = ';'.join(save_path)
        update_step_report(csv_path, param, 1, 0)
        logger.debug("上传图片开始")
        if judge_city_id:
            os.system(
                'java -jar /search/cuixiyi/ks3up-tool-2.0.6-20170801/ks3up-2.0.6.jar -c /search/cuixiyi/ks3up-tool-2.0.6-20170801/city.conf start'
            )
        logger.debug("上传图片结束")
        # logger.debug("开始更新ota_location表")
        # collection_name,task_name = create_task(city_path,path,database_name,param)
        # tasks = modify_status('step3',param,[collection_name,task_name])
        # hotel_file_name,poi_file_name = from_ota_get_city(temp_config,param)
        # temp_path = ''.join([base_path,hotel_file_name,])
        # os.system("rsync -vI {0} 10.10.150.16::opcity/{1}".format(temp_path, param))
        # temp_path = ''.join([base_path,poi_file_name])
        # os.system("rsync -vI {0} 10.10.150.16::opcity/{1}".format(temp_path, param))
        # logger.debug("结束更新ota_location表")

    except Exception as e:
        print(traceback.format_exc())
        csv_path = ';'.join(save_path)
        return_result['error']['error_id'] = 1
        return_result['error']['error_str'] = traceback.format_exc()
        return_result = json.dumps(return_result)
        print('[result][{0}]'.format(return_result))
        update_step_report(csv_path, param, -1, 0)
        logger.debug('[result][{0}]'.format(return_result))
Example #20
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2017/12/12 下午12:03
# @Author  : Hou Rong
# @Site    :
# @File    : ImgErrorMd5Search.py
# @Software: PyCharm
import random
from data_source import MysqlSource
from service_platform_conn_pool import base_data_pool, fetchall
from my_logger import get_logger
from collections import defaultdict

logger = get_logger("img_error_md5_search")

poi_ori_config = {
    'host': '10.10.228.253',
    'user': '******',
    'password': '******',
    'charset': 'utf8',
    'db': 'ServicePlatform'
}


def used_file_name():
    sql = '''SELECT
  first_image,
  image_list
FROM chat_shopping;'''
    _img_set = set()
    _count = 0
Example #21
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2017/12/30 下午9:27
# @Author  : Hou Rong
# @Site    : 
# @File    : reset_task.py
# @Software: PyCharm
import pymongo
import time
from my_logger import get_logger

logger = get_logger("reset_mongo_task")

client = pymongo.MongoClient(host='10.10.231.105')
db = client['MongoTask']

for each_name in db.collection_names():
    if each_name.startswith('Task_Queue_hotel_detail_TaskName_') and each_name.endswith('20171127a'):
        if 'expedia' not in each_name:
            continue
        start = time.time()
        collections = db[each_name]
        res = collections.update(
            {'finished': 0},
            {
                '$set': {
                    'running': 0,
                    'used_times': 0
                }
            },
            multi=True
Example #22
0
#!/usr/bin/env python
# encoding: utf-8
import pymysql
from pymysql.cursors import SSDictCursor, SSCursor
from my_logger import get_logger

logger = get_logger("data_source")


class MysqlSource:
    """
    数据库data源
    """
    def __init__(self,
                 db_config,
                 table_or_query='',
                 size=500,
                 is_table=True,
                 is_dict_cursor=False):
        self._db_config = db_config
        if is_dict_cursor:
            self._db_config['cursorclass'] = SSDictCursor
        else:
            self._db_config['cursorclass'] = SSCursor
        self._db_config['charset'] = 'utf8mb4'
        self._size = size
        self._table = table_or_query
        if is_table:
            self._sql = 'select * from {0}'.format(self._table)
        else:
            self._sql = table_or_query
Example #23
0
import threading
from collections import defaultdict
from toolbox.Common import is_legal
from data_source import MysqlSource
from service_platform_conn_pool import base_data_pool, poi_ori_pool
from my_logger import get_logger, func_time_logger
from poi_ori.already_merged_city import update_already_merge_city

poi_type = None
online_table_name = None
data_source_table = None
white_list = list()

max_id = None
lock = threading.Lock()
logger = get_logger("poi_ori")


def init_global_name(_poi_type):
    global poi_type
    global online_table_name
    global data_source_table
    poi_type = _poi_type
    if poi_type == 'attr':
        online_table_name = 'chat_attraction'
        data_source_table = 'attr'
    elif poi_type == 'rest':
        online_table_name = 'chat_restaurant'
        data_source_table = 'rest'
    elif poi_type == 'shop':
        online_table_name = 'chat_shopping'
Example #24
0
import logging
import my_logger
from db_layer.models import *
from db_layer.type_const import *
from telegraph import Telegraph
import requests

logger = my_logger.get_logger()
logger.setLevel(logging.INFO)


def init_db():
    logger.info('Connect to db')
    db.connect()
    logger.info('Init models')
    db.create_tables([User, QueuePost], safe=True)


def create_user(name: str, telegram_id: int, nickname: str):
    try:
        logger.info('Создание пользователя с nickname {nick} ...'.format(
            nick=nickname))
        User.create(name=name, telegram_id=telegram_id, nickname=nickname)
    except Exception:
        logger.error('ошибка создания пользователя!')
        return False
    else:
        logger.info(
            'успешное созданием пользователя {nick}'.format(nick=nickname))
        return True
Example #25
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2017/11/29 下午7:20
# @Author  : Hou Rong
# @Site    : 
# @File    : hotel_too_far.py
# @Software: PyCharm
import os
import numpy as np
from data_source import MysqlSource
from service_platform_conn_pool import base_data_pool
from my_logger import get_logger

logger = get_logger("hotel_too_far")

SQL_PATH = '/search/hourong/data_sql'

spider_task_data_config = {
    # 'host': '10.10.238.148',
    'host': '10.10.228.253',
    'user': '******',
    'password': '******',
    'charset': 'utf8',
    # 'db': 'tmp'
    'db': 'tmp'
}


def dist_from_coordinates(lat1, lon1, lat2, lon2):
    R = 6371  # Earth radius in km
Example #26
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2017/12/14 上午10:46
# @Author  : Hou Rong
# @Site    :
# @File    : detail_list_crawl_diff.py
# @Software: PyCharm
import pymongo
from my_logger import get_logger
from service_platform_conn_pool import service_platform_pool, fetchall

logger = get_logger('check_list_crawl_diff')

RespClient = pymongo.MongoClient(host='10.10.213.148')
RespDB = RespClient['data_result']

check_collection = [
    # ('Qyer20171214a', 'Task_Queue_poi_list_TaskName_list_total_qyer_20171209a', 'detail_total_qyer_20171209a'),
    ('qyer', 'Task_Queue_poi_list_TaskName_list_total_qyer_20171214a',
     'detail_total_qyer_20171214a')
]


def task_resp_url(collection_name, task_collection_name):
    __set = set()
    resp_collections = RespDB[collection_name]
    _count = 0
    for line in resp_collections.find({'collections': task_collection_name}):
        # result loop
        for each in line['result']:
            _count += 1
Example #27
0
from serviceplatform_data.load_final_data import main as load_final_data
from serviceplatform_data.load_final_data_test import main as load_final_data_qyer
from service_platform_report.routine_report import main as routine_report
from service_platform_report.send_error_email import send_error_report_email
from serviceplatform_data.insert_data_mongo import insert_hotel_data, insert_city_data
from serviceplatform_data.get_nearby_hotel_city import get_nearby_city
from serviceplatform_data.update_hotel_validation import UpdateHotelValidation
from serviceplatform_data.insert_poi_detect_task_info import get_task_info
from serviceplatform_data.delete_already_scanned_file import delete_already_scanned_file
from my_logger import get_logger
from service_platform_report.merge_report import poi_merged_report
from service_platform_report.task_progress_report_mongo_split_task import task_progress_report_split_task_main

SEND_TO = ['*****@*****.**', "*****@*****.**"]

logger = get_logger('cron_task_monitor')


def send_email(title, mail_info, mail_list, want_send_html=False):
    try:
        mail_list = ';'.join(mail_list)
        data = {
            'mailto': mail_list,
            'content': mail_info,
            'title': title,
        }
        if want_send_html:
            data['mail_type'] = 'html'
        requests.post('http://10.10.150.16:9000/sendmail', data=data)
    except Exception as e:
        logger.exception(msg="[send email error]", exc_info=e)
Example #28
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2017/10/30 下午9:37
# @Author  : Hou Rong
# @Site    :
# @File    : move_data.py
# @Software: PyCharm
from data_source import MysqlSource
from service_platform_conn_pool import base_data_final_pool
from my_logger import get_logger

logger = get_logger("move_data")

poi_ori_config = {
    'host': '10.10.228.253',
    'user': '******',
    'password': '******',
    'charset': 'utf8',
    'db': 'ServicePlatform'
}


def insert_data(data):
    update_sql = '''REPLACE INTO poi_images (file_name, source, sid, url, pic_size, bucket_name, url_md5, pic_md5, 
    `use`, part, date, info) VALUE (%(file_name)s, %(source)s, %(sid)s, %(url)s, %(pic_size)s, %(bucket_name)s, %(url_md5)s, 
    %(pic_md5)s, %(use)s, %(part)s, %(date)s, %(info)s);'''
    conn = base_data_final_pool.connection()
    cursor = conn.cursor()
    _res = cursor.executemany(update_sql, data)
    conn.commit()
    cursor.close()
Example #29
0
# @Software: PyCharm
import pymongo
import datetime
import json
import hashlib
import toolbox.Date
from data_source import MysqlSource
from patched_mongo import mongo_patched_insert
from my_logger import get_logger

toolbox.Date.DATE_FORMAT = "%Y%m%d"

client = pymongo.MongoClient(host='10.10.231.105')
collections = client['MongoTask']['Task']

logger = get_logger("insert_mongo_task")

base_data_final_config = {
    'host': '10.10.228.253',
    'user': '******',
    'password': '******',
    'charset': 'utf8',
    'db': 'BaseDataFinal'
}

offset = 0
pre_offset = 0


def insert_mongo(data):
    global offset
Example #30
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2017/12/6 下午9:34
# @Author  : Hou Rong
# @Site    :
# @File    : select_and_update_table.py
# @Software: PyCharm
import re
import json
from data_source import MysqlSource
from service_platform_conn_pool import poi_ori_pool, poi_face_detect_pool, service_platform_pool, base_data_final_pool, \
    fetchall
from my_logger import get_logger

logger = get_logger("select_and_update_table")

poi_ori_config = {
    'host': '10.10.228.253',
    'user': '******',
    'password': '******',
    'charset': 'utf8',
    'db': 'poi_merge'
}


def update_sql(data):
    sql = '''UPDATE chat_attraction
SET beentocount = %s, plantocount = %s, commentcount = %s
WHERE id = %s;'''
    conn = poi_ori_pool.connection()
    cursor = conn.cursor()
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2017/10/25 上午9:49
# @Author  : Hou Rong
# @Site    :
# @File    : insert_poi_detect_task_info.py
# @Software: PyCharm
import pymysql
from warnings import filterwarnings
from service_platform_conn_pool import service_platform_pool, base_data_pool
from data_source import MysqlSource
from my_logger import get_logger

filterwarnings('ignore', category=pymysql.err.Warning)

logger = get_logger("insert_poi_detect_task_info")

service_platform_conf = {
    'host': '10.10.228.253',
    'user': '******',
    'password': '******',
    'charset': 'utf8',
    # 'db': 'ServicePlatform'
    'db': 'poi_merge'
}

offset = 0
cid2grade = None


def insert_task_data(data, _count):