Ejemplo n.º 1
0
def register_action():
    name = request.values.get("name").encode(
        "utf-8") if "name" in request.values else ''
    email = request.values.get("email").encode(
        "utf-8") if "email" in request.values else ''
    pwd = request.values.get("pwd") if "pwd" in request.values else ''
    hashed_pwd = generate_password_hash(pwd, salt_length=10)
    logger.info('register_action data : ' + name + ', ' + email + ', ' +
                hashed_pwd)

    cursor = dao.get_conn().cursor()
    cursor.callproc("insert_r_user", (email, name, hashed_pwd, 0))
    cursor.execute("select @_insert_r_user_3")
    result = cursor.fetchone()
    cursor.close()
    g.conn.commit()
    last_id = result[0]
    logger.info("insert result (last_id) : %s" % (last_id))

    msg = ""
    if result[0] > 0:
        msg = name + "님 가입 성공, 로그인 하세요."
    else:
        msg = "가입 실패"
    flash(msg)
    return redirect(url_for('main_view.index'))
Ejemplo n.º 2
0
    def doWork(self):
        '''重写WorkRequest类的线程执行函数,此函数将在线程池中执行'''
        logger.debug('Start storage`s doWork...')
        if not self.__initDB():
            logger.error('Storage thread is stop.')
            return

        conn = sqlite3.connect(self.__dbPath)
        cur = conn.cursor()
        while True:
            try:
                # 从data队列获取数据并插入数据库
                if self.__dataQueue.qsize() > 0:
                    data = self.__dataQueue.get()
                    sqlInsert = '''INSERT INTO zspider(url, time, depth) VALUES ('%s', '%s', %d)''' % (
                        data.url, data.time, data.depth)
                    cur.execute(sqlInsert)
                    conn.commit()
                else:
                    time.sleep(1)
            except Exception, e:
                logger.error('Database operate exception: %s ', str(e))
                continue
            # 检测退出事件
            if self.__exitEvent.is_set():
                cur.close()
                conn.close()
                logger.info('Storage model quit...')
                return
Ejemplo n.º 3
0
def apply_step3():
    data = {}
    data['step'] = 3

    notice_no = request.values.get(
        "notice_no") if "notice_no" in request.values else ''
    if not appliable_check(notice_no):
        flash("수정가능한 기간이 아닙니다.")
        return redirect(url_for("mypage_view.mypage"))
    ####################
    cursor = dao.get_conn().cursor()
    query_str = "select r_cert_name, r_cert_no, r_cert_date, r_notice_no from resume where r_writer_no = %s and r_notice_no = %s" % (
        current_user.user_no, notice_no)
    cursor.execute(query_str)
    logger.info("apply_step3 query : " + query_str)
    result = cursor.fetchall()
    col_names = cursor.description

    if result:
        for idx in range(0, len(col_names)):
            data[col_names[idx][0]] = result[0][idx]
    ####################

    logger.info("apply_step3 view data : " + str(result))
    return render_template('apply/step3.html', data=data)
Ejemplo n.º 4
0
    def gen_items(self):
        # errno handler
        # type of errno is <class 'int'>
        errno = self.get_errno()
        if errno == 400:
            raise DeviceNotFoundException()

        if errno == 401:
            raise NoDataException()

        if errno == 0:
            pass
        
        # shared property
        deviceid = self.get_deviceid()
        logger.info('==deviceid=={}'.format(deviceid))
        datapoints = self.raw.get('datapoints')
        for datapoint in datapoints:
            item = dict()
            item['status'] = datapoint.get('status')
            item['online'] = datapoint.get('online')
            item['cct'] = datapoint.get('cct')
            item['brightness'] = datapoint.get('brightness')
            # for_power_calculation
            item['time'] = datapoint.get('time')
            # deviceid
            item['deviceid'] = deviceid
            self.items.append(item) 
Ejemplo n.º 5
0
def mypage():

    ##########################
    cursor = dao.get_conn().cursor()
    join_query = "select * from resume join recruit_notice on resume.r_notice_no = recruit_notice.n_no and resume.r_writer_no=%s" % (
        current_user.user_no)
    cursor.execute(join_query)
    result = cursor.fetchall()
    col_names = cursor.description
    ##########################

    data = {}
    if result:
        for row in range(0, len(result)):
            temp = {}
            for idx in range(0, len(col_names)):
                temp[col_names[idx][0]] = result[row][idx]
            temp['expire_chk'] = 0

            if result[row][29] < datetime.now():
                temp['expire_chk'] = 1
            data[row] = temp

    logger.info("mypage view data : " + str(data))
    return render_template("/mypage/mypage.html", data=data)
Ejemplo n.º 6
0
def get_assignment_map_from_checkpoint(tvars, init_checkpoint):
    """Compute the union of the current variables and checkpoint variables."""
    assignment_map = {}
    initialized_variable_names = {}

    name_to_variable = collections.OrderedDict()
    for var in tvars:
        name = var.name
        m = re.match("^(.*):\\d+$", name)
        if m is not None:
            name = m.group(1)
        name_to_variable[name] = var
    logger.info(init_checkpoint)

    init_vars = tf.train.list_variables(init_checkpoint)

    assignment_map = collections.OrderedDict()
    for x in init_vars:
        (name, var) = (x[0], x[1])
        if name not in name_to_variable:
            continue
        assignment_map[name] = name
        initialized_variable_names[name] = 1
        initialized_variable_names[name + ":0"] = 1

    return assignment_map, initialized_variable_names
Ejemplo n.º 7
0
 def doTransform(self):
     sourceFiles = self.sourceFiles()
     logger.info("transform {} source files", len(sourceFiles))
     for dataEntry in self.dictData:
         logger.debug("process 学号: {}/报名号: {}", int(dataEntry['学号']),
                      dataEntry['报名号'])
         sourcePath1 = os.path.join(self.inputDir,
                                    str(dataEntry['证件号码']) + ".jpg")
         sourcePath2 = os.path.join(self.inputDir,
                                    str(int(dataEntry['考生编号'])) + ".jpg")
         sourcePath3 = os.path.join(
             self.inputDir, '10673_18_' + str(dataEntry['报名号']) + ".jpg")
         targetPath = os.path.join(self.outputDir,
                                   str(int(dataEntry['学号'])) + ".jpg")
         if os.path.exists(sourcePath1):
             logger.debug("rename from (证件号码) {} to {}", sourcePath1,
                          targetPath)
             os.rename(sourcePath1, targetPath)
         elif os.path.exists(sourcePath2):
             logger.debug("rename from (考生编号) {} to {}", sourcePath2,
                          targetPath)
             os.rename(sourcePath2, targetPath)
         elif os.path.exists(sourcePath3):
             logger.debug("rename from (10673_18_报名号) {} to {}",
                          sourcePath3, targetPath)
             os.rename(sourcePath3, targetPath)
         else:
             logger.warn("source file for {} does not exit, skip it",
                         int(dataEntry['学号']))
Ejemplo n.º 8
0
def apply_step1():
    data = {}
    data['step'] = 1

    sel1 = request.values.get("sel1") if "sel1" in request.values else ''
    sel2 = request.values.get("sel2") if "sel2" in request.values else ''
    notice_no = request.values.get(
        "notice_no") if "notice_no" in request.values else ''

    if not appliable_check(notice_no):
        flash("수정가능한 기간이 아닙니다.")
        return redirect(url_for("mypage_view.mypage"))

    data["sel1"] = sel1
    data["sel2"] = sel2

    ####################
    cursor = dao.get_conn().cursor()
    query_str = "select r_group1, r_group2, r_notice_no from resume where r_writer_no = %s and r_notice_no = %s" % (
        current_user.user_no, notice_no)
    cursor.execute(query_str)
    result = cursor.fetchall()
    col_names = cursor.description
    logger.info("apply_step1 query : " + query_str)
    if result:
        for idx in range(0, len(col_names)):
            data[col_names[idx][0]] = result[0][idx]
    ####################

    logger.info('apply_step1 view data : ' + str(data))
    return render_template('apply/step1.html', data=data)
Ejemplo n.º 9
0
 def __on_mqtt_message(self, client, userdata, msg):
     try:
         logger.info("mqtt msg %s %s", msg.topic, msg.payload)
         data = json.loads(msg.payload)
         charge_info = None
         if msg.topic.startswith(MQTT_RESP_TOPIC):
             if "return_code" not in data:
                 logger.debug("mqtt msg hasn't return code")
             elif data["return_code"] == "400":
                 self.refresh_remote_token(force=True)
                 logger.error("retry last request, token was expired")
             elif data["return_code"] == "300":
                 logger.error('%s', data["return_code"])
             elif data["return_code"] != "0":
                 logger.error('%s : %s', data["return_code"], data["reason"])
                 if msg.topic.endswith("/VehicleState"):
                     charge_info = data["resp_data"]["charging_state"]
                     self.precond_programs[data["vin"]] = data["resp_data"]["precond_state"]["programs"]
         elif msg.topic.startswith(MQTT_EVENT_TOPIC):
             charge_info = data["charging_state"]
         if charge_info is not None and charge_info['remaining_time'] != 0 and charge_info['rate'] == 0:
             # fix a psa server bug where charge beginning without status api being properly updated
             logger.warning("charge begin but API isn't updated")
             sleep(60)
             self.wakeup(data["vin"])
     except KeyError:
         logger.exception("mqtt message:")
Ejemplo n.º 10
0
async def cookie2user(cookie_str):
    '''
    Parse cookie and load user if cookie is valid.
    '''
    if not cookie_str:
        return None
    try:
        L = cookie_str.split('-')
        if len(L) != 3:
            return None
        uid, expires, sha1 = L
        if int(expires) < time.time():
            return None
        user = await User.find(uid)
        if user is None:
            return None
        s = '{}-{}-{}-{}'.format(uid, user.passwd, expires, _COOKIE_KEY)
        if sha1 != hashlib.sha1(s.encode('utf-8')).hexdigest():
            logger.info('invalid sha1')
            return None
        user.passwd = '******'
        return user
    except Exception as e:
        logger.exception(e)
        return None
Ejemplo n.º 11
0
    def doWork(self):
        '''重写WorkRequest类的线程执行函数,此函数将在线程池中执行,
	   功能:从为自己分配的下载队列中取出url进行下载
	   '''
        logger.debug('Start downloader`s doWork...')
#	self.test()
        while True:
            if self.__dlQueue.qsize() > 0:
                urlNode = self.__dlQueue.get()
                self.__downloadingFlag += 1
                page = self.__downloadPage(urlNode.url)
                if len(page) == 0:
                    self.__downloadingFlag -= 1
                    continue
#                logger.debug('download page success, url: %s', urlNode.url)
                # 将下载的html页面封装为内部数据格式并添加到html队列供解析模块解析
                htmlNode = HtmlModel(urlNode.url, page, timestamp(), urlNode.depth)
                self.__htmlQueue.put(htmlNode)
                self.__downloadingFlag -= 1
            # 检测退出事件
            if self.__exitEvent.is_set():
                logger.info('Download model quit...')
                return
            # 下载时间间隔
            time.sleep(FETCH_TIME_INTERVAL)
Ejemplo n.º 12
0
def get_from_iotapi(username, password, email):

    # step1: get fresh token by iot admin credential
    token = get_iotapi_token(username, password)
    logger.info('==token=={}'.format(token))

    # step2: query account info by iot token and cbyge account email
    account_info = get_iotapi_account(token, email)
    print('==account_info==', account_info)

    # step3: query subscribe list by iot token and cbyge account id
    account_id = account_info.get('id')
    subscribe_list = get_iotapi_subscribe(token, account_id)
    print('==subscribe_list==', subscribe_list)

    # step4: query extattr list by iot token and subscribe id
    subscribe_raw_list = subscribe_list.get('list')
    subscribe_count = len(subscribe_raw_list)
    extattr_list = []
    if subscribe_count > 0:
        for item in subscribe_raw_list:
            product_id = item.get('product_id')
            device_id = item.get('id')
            place_name = item.get('name')
            extattr_item = get_iotapi_extattr(token, product_id, device_id)
            extattr_item_forge = {'place_name': place_name, 'device_id': device_id, 'extattr_item': extattr_item}
            extattr_list.append(extattr_item_forge)
    print('==extattr_list==', extattr_list)

    # return results in tuple format
    return account_info, subscribe_list, extattr_list
Ejemplo n.º 13
0
 def horn(self, vin, count):
     msg = self.mqtt_request(vin, {
         "nb_horn": count,
         "action": "activate"
     }, "/Horn")
     logger.info(msg)
     self.mqtt_client.publish(msg)
Ejemplo n.º 14
0
def admin_notice(page=1):

    #################
    cursor = dao.get_conn().cursor()
    cursor.execute("select * from recruit_notice")
    result = cursor.fetchall()
    cursor.close()
    #################

    per_page = 5
    total_post = len(result)
    cur_page = (int(page) - 1) * 5
    total_page = 0
    if int(total_post) % int(per_page) == 0:
        total_page = (int(total_post) / int(per_page))
    else:
        total_page = (int(total_post) / int(per_page)) + 1

    #################
    cursor = dao.get_conn().cursor()
    query_str = "select * from recruit_notice order by n_no DESC limit %s, %s" % (
        str(cur_page), str(per_page))
    cursor.execute(query_str)
    result = cursor.fetchall()
    cursor.close()
    #################

    logger.info("notice_board view data : " + str(result))
    return render_template('/admin/admin_notice.html',
                           data=result,
                           cur_page=cur_page,
                           per_page=per_page,
                           total_post=total_post,
                           total_page=total_page)
Ejemplo n.º 15
0
 def lights(self, vin, duration: int):
     msg = self.mqtt_request(vin, {
         "action": "activate",
         "duration": duration
     }, "/Lights")
     logger.info(msg)
     self.publish(msg)
Ejemplo n.º 16
0
 def writeText(self):
     with tqdm(self.filelist,
             total=len(self.filelist),
             desc="writeText proc",
             dynamic_ncols=True,
             leave=False
             ) as pbar:
         file_count = 0
         for file in self.filelist:
             results = []
             fileName = file.lstrip(self.inFolder).lstrip("/").rstrip(".txt")
             results.append(fileName)
             for key in self.key_list:
                 r_time = round(float(key),2)
                 results.append(str(self.r_time_dict[r_time][file_count]))
             results.append(str(self.r_time_dict['Total_Peak'][file_count]) + '\n')
             result = ",".join(results)
             self.result_list.append(result)
             file_count += 1
             pbar.update(1)
     logger.info(pbar)
     keys = ",".join(self.key_list) + ","
     keys += ",".join(['Total_Peak'])
     now = datetime.datetime.now()
     wfname = '{}/{}_{}.csv'.format(self.inFolder, self.inFolder, now.strftime('%Y%m%d_%H%M%S'))
     with open(wfname, 'w')as wf:
         wf.write(',' + keys +"\n")
         for result in self.result_list:
             wf.write(result)
Ejemplo n.º 17
0
def sents2vec(gen, coll, bc, library):
    '''
    read data from gen
    fetch vec from sent
    save date into mongo
    '''
    print('sents2vec')
    for sents in gen:
        try:
            arrs = bc.encode(sents)
        except:
            traceback.print_exc()
            print(sents)
            continue
        for sent, sentvec in zip(sents, arrs):
            try:
                sentvecid = genarate_mongo_id(sent)
                weather_sent_exist = search_in_mongo(coll, 'id', sentvecid)
                if not len(weather_sent_exist) == 0:
                    logger.info('there is one in db: %s' % sent)
                    continue
                library.write(sentvecid,
                              sentvec,
                              metadata={'sentvecid': sentvecid})
                json_cell = {'id': sentvecid, 'sent': sent}
                coll.insert(json_cell)
                logger.info('insert %s' % sent)
            except:
                traceback.print_exc()
                print(sents)
                continue
Ejemplo n.º 18
0
    def doWork(self):
        '''重写WorkRequest类的线程执行函数,此函数将在线程池中执行,
	   功能:从为自己分配的下载队列中取出url进行下载
	   '''
        logger.debug('Start downloader`s doWork...')
        #	self.test()
        while True:
            if self.__dlQueue.qsize() > 0:
                urlNode = self.__dlQueue.get()
                self.__downloadingFlag += 1
                page = self.__downloadPage(urlNode.url)
                if len(page) == 0:
                    self.__downloadingFlag -= 1
                    continue
#                logger.debug('download page success, url: %s', urlNode.url)
# 将下载的html页面封装为内部数据格式并添加到html队列供解析模块解析
                htmlNode = HtmlModel(urlNode.url, page, timestamp(),
                                     urlNode.depth)
                self.__htmlQueue.put(htmlNode)
                self.__downloadingFlag -= 1
            # 检测退出事件
            if self.__exitEvent.is_set():
                logger.info('Download model quit...')
                return
            # 下载时间间隔
            time.sleep(FETCH_TIME_INTERVAL)
Ejemplo n.º 19
0
def notice_post(p_no):
    
    if not appliable_check(p_no):
        flash("지원가능한 기간이 아닙니다.")
        return redirect(url_for("notice_view.notice_board"))
    
    cursor = dao.get_conn().cursor()
    cursor.execute("select * from recruit_notice where n_no = %s" % (p_no))
    result = cursor.fetchone()
    cursor.close()

    if not current_user.is_authenticated:
        result = result + ('login',)
        return render_template("/notice/notice_post.html", data=result)


    #이미 해당공고에 대한 이력서가 있으면 '수정하기'버튼을 보여준다.
    cursor = dao.get_conn().cursor()
    cursor.execute("select * from resume where r_notice_no = %s and r_writer_no = %s " % (p_no, current_user.user_no))
    result2 = cursor.fetchone();

    if result2:
        result = result + ('modify',)
    else:
        result = result + ('apply',)

    logger.info("notice_post view data : "+str(result))

    return render_template("/notice/notice_post.html", data=result)
Ejemplo n.º 20
0
def init_index(xb, d=768):
    index = faiss.IndexFlatL2(d)  # build the index
    print(index.is_trained)
    index.add(xb)  # add vectors to the index
    print(index.ntotal)
    logger.info('faiss index init ok')
    return index
Ejemplo n.º 21
0
def file_based_convert_examples_to_features(examples, label_list,
                                            max_seq_length, tokenizer,
                                            output_file):
    """Convert a set of `InputExample`s to a TFRecord file."""

    writer = tf.python_io.TFRecordWriter(output_file)

    for (ex_index, example) in enumerate(examples):
        if ex_index % 10000 == 0:
            logger.info("Writing example %d of %d" % (ex_index, len(examples)))

        feature = convert_single_example(ex_index, example, label_list,
                                         max_seq_length, tokenizer)

        def create_int_feature(values):
            f = tf.train.Feature(int64_list=tf.train.Int64List(
                value=list(values)))
            return f

        features = collections.OrderedDict()
        features["input_ids"] = create_int_feature(feature.input_ids)
        features["input_mask"] = create_int_feature(feature.input_mask)
        features["segment_ids"] = create_int_feature(feature.segment_ids)
        features["label_ids"] = create_int_feature([feature.label_id])
        features["is_real_example"] = create_int_feature(
            [int(feature.is_real_example)])

        tf_example = tf.train.Example(features=tf.train.Features(
            feature=features))
        writer.write(tf_example.SerializeToString())
    writer.close()
Ejemplo n.º 22
0
    def doWork(self):
        '''重写WorkRequest类的线程执行函数,此函数将在线程池中执行'''
        logger.debug('Start storage`s doWork...')
        if not self.__initDB():
            logger.error('Storage thread is stop.')
            return

        conn = sqlite3.connect(self.__dbPath)
        cur = conn.cursor()
        while True:
            try:
                # 从data队列获取数据并插入数据库
                if self.__dataQueue.qsize() > 0:
                    data = self.__dataQueue.get()
                    sqlInsert = '''INSERT INTO zspider(url, time, depth) VALUES ('%s', '%s', %d)''' % (data.url, data.time, data.depth)
                    cur.execute(sqlInsert)
                    conn.commit()
                else:
                    time.sleep(1)
            except Exception, e:
                logger.error('Database operate exception: %s ', str(e))
                continue
            # 检测退出事件
            if self.__exitEvent.is_set():
                cur.close()
                conn.close()
                logger.info('Storage model quit...')
                return
Ejemplo n.º 23
0
 def __on_mqtt_connect(self, client, userdata, result_code, _):
     logger.info("Connected with result code %s", result_code)
     topics = [MQTT_RESP_TOPIC + self.customer_id + "/#"]
     for car in self.vehicles_list:
         topics.append(MQTT_EVENT_TOPIC + car.vin)
     for topic in topics:
         client.subscribe(topic)
         logger.info("subscribe to %s", topic)
Ejemplo n.º 24
0
def _check_required_fields(request, *fields):
    if request.args is None:
       logger.info('No args fields.')
       abort(400)
    for field in fields:
       if request.args.get(field) is None:
          logger.info('Missing required args.')
          abort(400)
Ejemplo n.º 25
0
 def charge_now(self, vin, now):
     if now:
         charge_type = IMMEDIATE_CHARGE
     else:
         charge_type = DELAYED_CHARGE
     hour, minute = self.__get_charge_hour(vin)
     res = self.veh_charge_request(vin, hour, minute, charge_type)
     logger.info("charge_now: %s", res)
     return True
Ejemplo n.º 26
0
    def get_conn(self):
        print 'get_conn(), g.conn : %s' % (str(hasattr(g, 'conn')))
        if not hasattr(g, 'conn'):
            self.conn = self.engine.raw_connection()
            g.conn = self.conn
            print ">> connection open() : "+str(g.conn)

        logger.info("DBManager.open()")
        return g.conn;
Ejemplo n.º 27
0
def init_bert():
    logger.info('init bert')
    bc = BertClient(ip="192.168.1.64",
                    show_server_config=True,
                    timeout=1000,
                    port=5555,
                    port_out=5556)
    logger.info('init bert SUCC')
    return bc
Ejemplo n.º 28
0
def init_bert():
    logger.info('init bert')
    bc = BertClient(ip='58.17.133.80',
                    show_server_config=True,
                    timeout=10000,
                    port=18087,
                    port_out=15005)
    logger.info('init bert SUCC')
    return bc
Ejemplo n.º 29
0
 def __init__(self, excelFilePath, inputDir, outputDir):
     self.excelFilePath = excelFilePath
     self.inputDir = inputDir
     self.outputDir = outputDir
     if not os.path.exists(self.outputDir):
         logger.info("{} does not exist, create this output directory",
                     self.outputDir)
         os.mkdir(self.outputDir)
     self.parseData()
Ejemplo n.º 30
0
def logout_action():
    try:
        current_user.authenticated = False
        logout_user()
        flash("로그아웃 되었습니다.")
        return redirect(url_for("main_view.index"))
    except Exception as e:
        logger.info(str(e))
        raise e
Ejemplo n.º 31
0
def login_action():
    email = request.values["email"] if "email" in request.form else ""
    pw = request.values["pwd"] if "pwd" in request.form else ""
    notice_no = request.values[
        "notice_no"] if "notice_no" in request.values else ''
    rmb = request.values["rmb"] if "rmb" in request.form else ""

    print "rmb:" + str(rmb)

    cursor = dao.get_conn().cursor()
    cursor.execute("select * from recruit_user where user_email like '%s'" %
                   (email))
    result = cursor.fetchone()
    cursor.close()

    logger.info("login_action, check email : " + str(result))

    try:
        if result:
            if check_password_hash(result[3], pw):
                login_user(
                    User(email,
                         name=result[2].decode('utf-8'),
                         auth=True,
                         no=result[0]))

                if notice_no != '':
                    return redirect(
                        url_for('notice_view.notice_post', p_no=notice_no))
                else:

                    response = make_response(
                        redirect(url_for("main_view.index")))
                    if rmb == 'on':
                        from aes_cipher import encrypt
                        expire_date = datetime.now() + timedelta(days=90)
                        enc_email = encrypt(current_app.config['SECRET_KEY'],
                                            email)
                        response.set_cookie('rmber',
                                            value=enc_email,
                                            expires=expire_date)
                    else:
                        response.set_cookie('rmber', expires=0)

                    flash("로그인 되었습니다.")
                    return response
            else:
                flash("아이디 또는 비밀번호가 일치하지 않습니다.")
                return redirect(url_for("auth_view.login_form"))
        else:
            flash("아이디 또는 비밀번호가 일치하지 않습니다.")
            return redirect(url_for("auth_view.login_form"))

    except Exception as e:
        logger.info(str(e))
        raise e
Ejemplo n.º 32
0
def api_logout():
    try:
        current_user.authenticated = False
        #tmp_id = str(current_user.id)
        logout_user()
        session.clear()
        flash("로그아웃 되었습니다.")
        return redirect(url_for("main_view.main"))
    except Exception as e:
        logger.info(str(e))
Ejemplo n.º 33
0
 def _read_tsv(cls, input_file, quotechar=None):
     """Reads a tab separated value file."""
     logger.info("_read_tsv file: %s" % input_file)
     with tf.gfile.Open(input_file, "r") as f:
         reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
         lines = []
         for line in reader:
             # logger.info(line)
             lines.append(line)
         return lines
Ejemplo n.º 34
0
 def doWork(self):
     '''重写WorkRequest类的线程执行函数,此函数将在线程池中执行'''
     logger.debug('Start parser`s doWork...')
     while True:
         if self.__htmlQueue.qsize() > 0:
             self.__parsePage()
         else:
             time.sleep(1)
         #检测退出事件
         if self.__exitEvent.is_set():
             logger.info('Parser model quit...')
             return
Ejemplo n.º 35
0
    def start(self):
	'''创建并启动各个模块'''
        logger.debug('Init start urls...')
        self.__initUrlQueue(self.__startUrls)
        
	# 启动threadNum个下载器并为它们分配下载队列 
        logger.debug('Put downloader to thread pool...')
        for i in range(self.__threadNum):
            dlQueue = Queue.Queue()
            self.__downloadQueueList.append(dlQueue)
            downloadReq = Downloader(dlQueue, self.__downloadMode, self.__htmlQueue, self.__exitEvent, self.__downloadingFlag)
            self.__threadPool.putRequest(downloadReq)

	# 创建解析模块并添加到线程池运行
        logger.debug('Put parser to thread pool...')
        parserReq = Parser(self.__depth, self.__startUrls, self.__keyword, self.__htmlQueue, self.__dataQueue, self.__urlQueue, self.__exitEvent)
        self.__threadPool.putRequest(parserReq)

	# 创建存储模块并添加到线程池运行
        logger.debug('Put storage to thread pool...')
        storageReq = Storage(self.__dbName, self.__dataQueue, self.__exitEvent)
        self.__threadPool.putRequest(storageReq)

	# 主循环用于为各个下载队列分配url以及输出日志信息
        logger.debug('start main loop...')
        lastTime = time.time()
        while True:
            for dlQueue in self.__downloadQueueList:
                if self.__urlQueue.qsize() > 0 and dlQueue.qsize() < 1:
                    node = self.__urlQueue.get()
                    dlQueue.put(node)

            now = time.time()
            if now - lastTime > PRINT_TIME_INTERVAL:
                logger.info('URL QUEUE SIZE : %d', self.__urlQueue.qsize())
                logger.info('HTML QUEUE SIZE : %d', self.__htmlQueue.qsize())
                logger.info('DATA QUEUE SIZE : %d', self.__dataQueue.qsize())
                logger.info('REPEAT SET SIZE : %d', parserReq.getRepeatSetSize())
                # 延迟检测退出事件,防止程序启动时即退出 
                if now - lastTime > 30:
                    if self.__urlQueue.qsize() < 1 and self.__htmlQueue.qsize() < 1 and \
	                          self.__dataQueue.qsize() < 1 and self.__downloadingFlag < 1:
                        self.__exitEvent.set()
                        self.__threadPool.close(True)
                        return
                lastTime = now