コード例 #1
0
ファイル: ZJ21cnjy.py プロジェクト: wuhuizhong/talkweb
 def downloadKnowled(self,
                     select_sql=SQL.SELECT_SUBJECT_RELATION,
                     inser_konw_sql=SQL.INSERT_21CNJY_KNOWLED,
                     know_url=URL.KNOW_URL,
                     know_child_url=URL.KNOW_CHILD_URL):
     '''下载知识点'''
     pg = PostgreSql()
     try:
         for row in pg.getAll(select_sql):
             try:
                 subject_code, xd, subject_zname, course_21 = row
                 response = self.session.get(
                     know_url % (xd, course_21, Utils.getCurrMilliSecond()),
                     headers=self.headers)
                 rs = self.__recursiveKnowled(
                     response.json(), 1, subject_code,
                     know_child_url % ('%s', xd, course_21, '%s'))
                 if rs: pg.batchExecute(inser_konw_sql, rs)
                 pg.commit()
                 logger.info(
                     u'完成二一组卷网(学段:%s,学科:%s,线上学科名称:%s,线上学科代码:%s)知识点的导入,导入知识点数量:%d',
                     xd, course_21, subject_zname, subject_code, len(rs))
             except Exception as e:
                 logger.exception(
                     u'二一组卷网(学段:%s,学科:%s,线上学科名称:%s,线上学科代码:%s)知识点的导入异常', xd,
                     course_21, subject_zname, subject_code)
                 pg.rollback()
     finally:
         pg.close()
コード例 #2
0
ファイル: ZJ21cnjy.py プロジェクト: wuhuizhong/talkweb
 def downloadSubject(self,
                     select_sql=SQL.SELECT_SUBJECT_RELATION,
                     update_sql=SQL.UPDATE_SUBJECT_RELATION,
                     subjects_url=URL.SUBJECTS_URL %
                     Utils.getCurrMilliSecond()):
     '''下载21世纪学科学段,并更新与线网关系对应'''
     response = self.session.get(subjects_url, headers=self.headers)
     zj21cnjy_subject = response.json()
     pg = PostgreSql()
     try:
         update_params = []
         for row in pg.getAll(select_sql):
             subject_code, xd, subject_zname, course_21 = row
             if course_21: continue
             course_21_name = None
             for key, value in zj21cnjy_subject[str(xd)].iteritems():
                 value_temp = value if value != u'政治思品' else u'政治'
                 if subject_zname.find(value_temp) > -1:
                     course_21 = key
                     course_21_name = value
             if course_21:
                 update_params.append(
                     (course_21, course_21_name, subject_code))
         if update_params: pg.batchExecute(update_sql, update_params)
         pg.commit()
         logger.info(u'完成21cnjy与线网学科学段对应关系更新,更新数量:%d', len(update_params))
     except Exception as e:
         logger.exception(u'21cnjy与线网学科学段对应关系更新出现异常,异常信息:%s', e.message)
         pg.rollback()
     finally:
         pg.close()
コード例 #3
0
ファイル: jyeoo_parse_pic.py プロジェクト: wuhuizhong/talkweb
 def execExtract(self,
                 select_main_sql=SQL.select_main_sql,
                 insert_image_sql=SQL.insert_image_sql,
                 update_main_sql=SQL.update_main_sql,
                 select_convert_sql=SQL.select_convert_sql):
     '''执行提取图'''
     postgreSql = PostgreSql()
     count = 0
     rs = True
     try:
         flag = True  # 代表数据库里面还有需要处理的数据
         while flag:
             try:
                 flag = False
                 insert_image_params = []
                 update_main_params = []
                 for rows in postgreSql.getAll(select_main_sql):
                     flag = True
                     seq = rows[0]
                     qid = rows[1]
                     try:
                         urls = []
                         for col in rows[2:]:
                             for j_url in JyeooUtil.getJyeooImg(col):
                                 if j_url not in urls:
                                     urls.append(j_url)
                         # 生成临时的图片文件
                         self.__generateTmpImage(urls, postgreSql,
                                                 select_convert_sql)
                         # 插入数据到img表
                         insert_image_params.append(
                             (seq, qid, json.dumps(urls), 0 if urls else 2))
                         # 更新jyeoo主表的数据状态
                         update_main_params.append((1 if urls else 2, qid))
                     except Exception as ex:
                         rs = False
                         logger.exception(
                             u"提取图片-----处理qi=%s,创建题目的图片发生异常,异常信息:%s" %
                             (qid, ex.message))
                 return rs
                 if insert_image_params:
                     postgreSql.batchExecute(insert_image_sql,
                                             insert_image_params)
                 if update_main_params:
                     postgreSql.batchExecute(update_main_sql,
                                             update_main_params)
                 postgreSql.commit()
                 count += len(insert_image_params)
                 logger.info(u'提取图片-----已成功处理题目数量:%d' % count)
             except Exception as e:
                 postgreSql.rollback()
                 rs = False
                 logger.exception(u"提取图片-----批量处理-异常信息:%s" % (e.message))
     finally:
         postgreSql.close()
     return rs
コード例 #4
0
ファイル: ZJ21cnjy.py プロジェクト: wuhuizhong/talkweb
 def extractQuesImage(self,
                      rows=1000,
                      select_batch_ques=SQL.SELECT_BATCH_QUES,
                      insert_image_url=SQL.INSERT_IMAGE_URL,
                      update_status=SQL.UPDATE_STATUS):
     '''分析提取题目图片'''
     logger.info(u'开始分析提取题目图片')
     seq = 0
     try:
         pg = PostgreSql()
         flag = True
         count = 0
         while flag:
             try:
                 flag = False
                 insert_params = []
                 update_params = []
                 for row in pg.getAll(select_batch_ques, (0, seq, rows)):
                     flag = True
                     qid = row[0]
                     old_id = row[1]
                     seq = row[2]
                     try:
                         urls = [row[3], row[4]]
                         for col in row[5:]:
                             if col is None: continue
                             urls.extend(self.__get21cnjyImg(col))
                         # 生成临时的图片文件
                         self.__generateTmpImage(urls)
                         # 插入数据到img表 存在图片状态为0,不存在图片状态为2
                         insert_params.append((seq, qid, Utils.toJson(urls),
                                               0 if urls else 2))
                         # 更新21cnjy主表的数据状态 存在图片状态修改为1,不存在图片状态为2
                         update_params.append((1 if urls else 2, qid))
                     except Exception as ex:
                         logger.exception(
                             u"处理qi=%s,old_id=%s,创建题目的图片发生异常,异常信息:%s" %
                             (qid, old_id, ex.message))
                 if update_params:
                     pg.batchExecute(update_status, update_params)
                 if insert_params:
                     pg.batchExecute(insert_image_url, insert_params)
                 pg.commit()
                 count += len(update_params)
                 logger.info(u'已成功处理题目数量:%d' % count)
             except Exception as e:
                 pg.rollback()
                 logger.exception("批量处理-异常信息:%s" % (e.message))
     finally:
         pg.close()
コード例 #5
0
ファイル: ZujuanFindImags.py プロジェクト: wuhuizhong/talkweb
 def findImags(self, subject):
     seq_num = 0
     rows = self.ROWS
     try:
         pg = PostgreSql()
         flag = True
         count = 0
         while flag:
             try:
                 flag = False
                 insert_params = []
                 update_params = []
                 for row in pg.getAll(self.SELECT_SQL,
                                      (subject, seq_num, rows)):
                     flag = True
                     qid = row[0]
                     old_id = row[1]
                     seq_num = row[2]
                     try:
                         urls = []
                         for col in row[3:]:
                             if col is None: continue
                             urls.extend(self.getZjImg(col))
                         print(urls)
                         # 生成临时的图片文件
                         self.generateTmpImage(urls)
                         # 插入数据到img表 存在图片状态为0,不存在图片状态为2
                         insert_params.append(
                             (qid, json.dumps(urls), 0 if urls else 2))
                         # 更新jyeoo主表的数据状态 存在图片状态修改为1,不存在图片状态为2
                         update_params.append((1 if urls else 2, qid))
                     except Exception as ex:
                         logger.exception(
                             u"处理qi=%s,old_id=%s,创建题目的图片发生异常,异常信息:%s" %
                             (qid, old_id, ex.message))
                 if update_params:
                     pg.batchExecute(self.UPDATE_SQL, update_params)
                 if insert_params:
                     pg.batchExecute(self.INSERT_SQL, insert_params)
                 pg.commit()
                 count += len(update_params)
                 logger.info(u'已成功处理题目数量:%d' % count)
             except Exception as e:
                 pg.rollback()
                 logger.exception("批量处理-异常信息:%s" % (e.message))
     finally:
         pg.close()
コード例 #6
0
ファイル: ZJ21cnjy.py プロジェクト: wuhuizhong/talkweb
 def downloadQuestions(
         self,
         ques_type='',
         ques_pg_url=URL.QUES_PG_URL,
         select_knowled_id=SQL.SELECT_KNOWLED_ID,
         select_subject_sql=SQL.SELECT_SUBJECT_RELATION,
         select_params_type=SQL.SELECT_PARAMS_TYPE,
         update_knowled_downloded=SQL.UPDATE_KNOWLED_DOWNLODED):
     '''按知识点下载题目'''
     pg = PostgreSql()
     try:
         #获取题目类型信息
         ques_type_dic = {}
         for row_ques_types in pg.getAll(select_params_type,
                                         ('ques_type', 1)):
             subject_code, code_21cnjy, name_21cnjy, code, name = row_ques_types
             #按学科类型分组
             if not ques_type_dic.has_key(subject_code):
                 ques_type_dic[subject_code] = {}
             #具体学科的编码类型信息
             ques_type_dic[subject_code][code_21cnjy] = {
                 'code_21cnjy': code_21cnjy,
                 'name_21cnjy': name_21cnjy,
                 'code': code,
                 'name': name
             }
         #获取学段信息
         for row in pg.getAll(select_subject_sql):
             try:
                 subject_code, xd, subject_zname, course_21 = row
                 self.__downloadQuestionsBySubject(
                     row, pg, ques_pg_url, ques_type_dic[subject_code],
                     ques_type, select_knowled_id, update_knowled_downloded)
             except Exception as e:
                 logger.exception(
                     u'二一组卷网(学段:%s,学科:%s,线上学科名称:%s,线上学科代码:%s)题目导入异常', xd,
                     course_21, subject_zname, subject_code)
                 pg.rollback()
                 raise e
     finally:
         pg.close()
コード例 #7
0
ファイル: pytesser_zujuan.py プロジェクト: wuhuizhong/talkweb
def findImags(subject):
    rows = ROWS
    try:
        pg = PostgreSql()
        flag = True
        count = 0
        qid = '0'

        err_count = 0
        while flag:
            try:
                flag = False
                update_params = []
                update_err_params = []
                for row in pg.getAll(SELECT_SQL, (subject, qid, rows)):
                    flag = True
                    qid = row[0]
                    url = getZjImg(row[1])[0]
                    try:
                        choice_answer = getTextByImageUrl2(url)
                        answer_arr = []
                        answer_arr.append(choice_answer)
                        update_params.append(
                            (json.dumps(answer_arr, ensure_ascii=False), qid))
                        count += 1
                    except Exception as ex:
                        err_count += 1
                        update_err_params.append((-1, qid))
                        logger.exception('异常的题目ID:%s,url:%s', qid, url)
                if update_params: pg.batchExecute(UPDATE_SQL, update_params)
                if update_err_params:
                    pg.batchExecute(UPDATE_STATUS_SQL, update_err_params)
                pg.commit()
                logger.info(u'学科编码:%d,已成功处理题目数量:%d,错误数量:%d' %
                            (subject, count, err_count))
            except Exception as e:
                pg.rollback()
                logger.exception("学科编码:%d,批量处理-异常信息:%s" % (subject, e.message))
    finally:
        pg.close()
コード例 #8
0
ファイル: ZJ21cnjy.py プロジェクト: wuhuizhong/talkweb
 def downloadQueryParams(self,
                         select_sql=SQL.SELECT_SUBJECT_RELATION,
                         insert_sql=SQL.INSERT_21CNJY_TYPE,
                         query_param_url=URL.QUERY_PARAM_URL,
                         ques_query_type=QUES_QUERY_TYPE):
     '''下载题目查询参数----如题目类型、难度等等'''
     pg = PostgreSql()
     count = 0
     try:
         for row in pg.getAll(select_sql):
             try:
                 subject_code, xd, subject_zname, course_21 = row
                 insert_params = []
                 response = self.session.get(
                     query_param_url %
                     (xd, course_21, Utils.getCurrMilliSecond()),
                     headers=self.headers)
                 for param_type, values in response.json().iteritems():
                     if not ques_query_type.has_key(param_type):
                         continue
                     for code, name in values.iteritems():
                         count += 1
                         id = count
                         insert_params.append(
                             (id, ques_query_type[param_type], code, name,
                              subject_code))
                 if insert_params:
                     pg.batchExecute(insert_sql, insert_params)
                 pg.commit()
                 logger.info(
                     u'完成二一组卷网(学段:%s,学科:%s,线上学科名称:%s,线上学科代码:%s)查询参数的导入,导入参数的数量:%d,所有学科处理总数%d',
                     xd, course_21, subject_zname, subject_code,
                     len(insert_params), count)
             except Exception as e:
                 logger.exception(
                     u'二一组卷网(学段:%s,学科:%s,线上学科名称:%s,线上学科代码:%s)查询参数的导入异常', xd,
                     course_21, subject_zname, subject_code)
                 pg.rollback()
     finally:
         pg.close()
コード例 #9
0
    def main(self,startTime=start_time, root_path=PATH.rootImagPath,
             pic_new_path=PATH.pic_new_path):
        select_sql = self.SELECT_SQL_IMG
        update_sql = self.UPDATE_SQL
        update_sql_img = self.UPDATE_SQL_IMG
        insert_sql = self.INSERT_SQL_CONVERT

        curr_time = time.time()
        curr_time_strft = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(curr_time))
        # 记录当前分析时间
        logger.info(u'本次分析时间:%s,秒:%.2f' % (curr_time_strft, curr_time))
        postgreSql = PostgreSql()
        count = 0
        total = 0
        try:
            flag = True  # 代表数据库里面还有需要处理的数据
            id = 0
            while flag:
                try:
                    flag = False
                    update_params = []
                    update_image_params = []
                    insert_params = []
                    for rows in postgreSql.getAll(select_sql % id):
                        flag = True
                        total += 1
                        id = rows[0] if rows[0] > id else id
                        qid = rows[1]
                        urls = rows[2]
                        try:
                            isDownloadFinish = True
                            urlMap = {}
                            for url in json.loads(urls):
                                url_path = urlparse.urlsplit(url)
                                fileName = os.path.join(root_path, url_path.path[1:])
                                if os.path.exists(fileName):
                                    mtime = os.path.getmtime(fileName)
                                    if mtime >= curr_time:
                                        isDownloadFinish = False
                                    elif start_time <= mtime:
                                        (temp, extension) = os.path.splitext(fileName)
                                        # 新文件名称
                                        file_new_name = "%s%s" % (Utils.getStrMD5(url + "-mqm"), extension)
                                        # 新文件名称 - 全名
                                        file_new_name_all = os.path.join(pic_new_path, file_new_name)
                                        # 新的url
                                        url_new = image_url + file_new_name
                                        urlMap[url] = url_new
                                        if not os.path.exists(file_new_name_all):
                                            shutil.copy2(fileName, file_new_name_all)
                                            Utils.modifyMD5(file_new_name_all)
                                            insert_params.append((url, url_new))
                                    else:
                                        # 表示为之前处理过的图片
                                        sql_url = self.SQL_URL
                                        urlMap[url] = postgreSql.getOne(sql_url, (url,))[0]
                                        # if urlMap[url]:
                                        #    logger.error(u'oldurl:%s,数据不存在'% url)
                                else:
                                    isDownloadFinish = False

                            # 下载完成就更新t_jyeoo_img_url
                            if isDownloadFinish:
                                update_image_params.append((1, qid))
                                # 设置替换的图片url、更新原始数据表的状态为3(有图片、图片下载完成)
                                update_params.append((json.dumps(urlMap), 3, qid))

                        except Exception as ex:
                            logger.exception(u"处理qi=%s,校验题目的所有图片下载是否完成发生异常,异常信息:%s" % (qid, ex.message))
                    if update_params: postgreSql.batchExecute(update_sql, update_params)
                    if update_image_params: postgreSql.batchExecute(update_sql_img, update_image_params)
                    if insert_params: postgreSql.batchExecute(insert_sql, insert_params)
                    postgreSql.commit()
                    count += len(update_image_params)
                    logger.info(u'已成功处理题目数量:%d,校验题目数量总数:%d' % (count, total))
                except Exception as e:
                    postgreSql.rollback()
                    logger.exception("批量处理-异常信息:%s" % (e.message))
        finally:
            postgreSql.close()
コード例 #10
0
ファイル: jyeoo_parse_pic.py プロジェクト: wuhuizhong/talkweb
 def execParseImage(self,
                    select_image_sql=SQL.select_image_sql,
                    select_convert_sql=SQL.select_convert_sql,
                    update_main_url_sql=SQL.update_main_url_sql,
                    update_image_sql=SQL.update_image_sql,
                    insert_convert_sql=SQL.insert_convert_sql,
                    picture_path=PATH.picture_path,
                    pic_new_path=PATH.pic_new_path,
                    pic_relative_path=PATH.pic_relative_path,
                    image_url=image_url):
     pic_new_real_path = os.path.join(pic_new_path, pic_relative_path)
     image_real_url = urlparse.urljoin(image_url, pic_relative_path)
     logger.info(u'进入处理图片流程,原始图片路径:%s,处理后图片存放路径:%s,图片url前缀地址:%s',
                 picture_path, pic_new_real_path, image_real_url)
     if not os.path.exists(pic_new_real_path):
         os.makedirs(pic_new_real_path)
     postgreSql = PostgreSql()
     count = 0
     total = 0
     rs = True
     try:
         flag = True  # 代表数据库里面还有需要处理的数据
         id = 0
         while flag:
             try:
                 flag = False
                 update_main_params = []
                 update_image_params = []
                 insert_convert_params = []
                 for rows in postgreSql.getAll(select_image_sql % id):
                     flag = True
                     total += 1
                     id = rows[0] if rows[0] > id else id
                     qid = rows[1]
                     urls = rows[2]
                     try:
                         isDownloadFinish = True
                         urlMap = {}
                         for url in json.loads(urls):
                             url_path = urlparse.urlsplit(url)
                             fileName = os.path.join(
                                 picture_path, url_path.path[1:])
                             if os.path.exists(fileName):
                                 (temp,
                                  extension) = os.path.splitext(fileName)
                                 # 新文件名称
                                 file_new_name = "%s%s" % (Utils.getStrMD5(
                                     url + "-mqm"), extension)
                                 # 新文件名称 - 全名
                                 file_new_name_all = os.path.join(
                                     pic_new_path, file_new_name)
                                 # 新的url
                                 url_new = image_real_url + file_new_name
                                 urlMap[url] = url_new
                                 if os.path.exists(file_new_name_all):
                                     if not postgreSql.getOne(
                                             select_convert_sql, (url, )):
                                         insert_convert_params.append(
                                             url, url_new)
                                 else:
                                     shutil.copy2(fileName,
                                                  file_new_name_all)
                                     Utils.modifyMD5(file_new_name_all)
                                     insert_convert_params.append(
                                         (url, url_new))
                             else:
                                 #查询
                                 rs = postgreSql.getOne(
                                     select_convert_sql, (url, ))
                                 if rs:
                                     urlMap[url] = rs[0]
                                 else:
                                     isDownloadFinish = False
                         # 下载完成就更新t_jyeoo_img_url
                         if isDownloadFinish:
                             update_image_params.append((1, qid))
                             # 设置替换的图片url、更新原始数据表的状态为3(有图片、图片下载完成)
                             update_main_params.append(
                                 (json.dumps(urlMap), 3, qid))
                     except Exception as ex:
                         rs = False
                         logger.exception(
                             u"处理图片流程,qi=%s,校验题目的所有图片下载是否完成发生异常,异常信息:%s" %
                             (qid, ex.message))
                 return rs
                 if update_main_params:
                     postgreSql.batchExecute(update_main_url_sql,
                                             update_main_params)
                 if update_image_params:
                     postgreSql.batchExecute(update_image_sql,
                                             update_image_params)
                 if insert_convert_params:
                     postgreSql.batchExecute(insert_convert_sql,
                                             insert_convert_params)
                 postgreSql.commit()
                 count += len(update_image_params)
                 logger.info(u'处理图片流程,已成功处理题目数量:%d,校验题目数量总数:%d' %
                             (count, total))
             except Exception as e:
                 rs = False
                 postgreSql.rollback()
                 logger.exception(u"处理图片流程,批量处理-异常信息:%s" % (e.message))
     finally:
         postgreSql.close()
     return rs