Ejemplo n.º 1
0
def processImage():
    # 初始化
    root_path = 'picture'
    analysis_path = 'analysis'
    if not os.path.exists(root_path): os.makedirs(root_path)
    if not os.path.exists(analysis_path): os.makedirs(analysis_path)
    postgresql = PostgreSql()
    try:
        num_id = 0
        rows = 1000
        sql = 'SELECT id,old_id,new_id FROM "public"."t_ques_id" where id > %s and status = 0 order by id ASC limit %s'
        update_sql = 'UPDATE t_ques_id SET status = 1,analysis_url= %s WHERE old_id = %s'
        count = 0  # 处理数据
        total = 0
        flag = True  # 是否存在数据
        while flag:
            flag = False
            update_params = []
            for row in postgresql.getAll(sql, (num_id, rows)):
                num_id = row[0]
                flag = True
                old_id = row[1]
                new_id = row[2]
                total += 1
                pic_file = os.path.join(root_path, old_id + '.png')
                pic_app_file = os.path.join(root_path, old_id + '.app.png')
                if os.path.exists(pic_file) and os.path.exists(
                        pic_app_file
                ) and os.path.getsize(pic_app_file) > 0 and os.path.getsize(
                        pic_file) > 0:
                    new_file = new_id + '.png'
                    pic_new_file = os.path.join(analysis_path, new_file)
                    pic_new_app_file = os.path.join(analysis_path,
                                                    new_id + '.app.png')
                    shutil.copyfile(pic_file, pic_new_file)
                    shutil.copyfile(pic_app_file, pic_new_app_file)
                    analysis_url = 'http://image.yuncelian.com/1/analysis/' + new_file
                    update_params.append((analysis_url, old_id))
                    count += 1
            if update_params:
                postgresql.batchExecute(update_sql, update_params)
            postgresql.commit()
            logger.info(u'处理总数:%d,已处理菁优图片数量:%d', total, count)
    finally:
        postgresql.close()
Ejemplo n.º 2
0
def jyeooAnalysisToPic():
    postgresql = PostgreSql()
    try:
        qid = ' '
        rows = 1000
        page = 0
        flag = True  # 是否存在数据
        sql = 'SELECT qid FROM "public"."t_ques_jyeoo" where seq > %s and seq <= %s'
        base_url = 'http:/10.200.150.2:20017/ques/analyseById?qid=%s'
        base_app_url = 'http://10.200.150.2:20017/ques/app/analyseById?qid=%s'
        root_path = 'picture'
        if not os.path.exists(root_path): os.makedirs(root_path)
        count = 0  # 处理数据
        while flag:
            flag = False
            page += 1
            for row in postgresql.getAll(sql,
                                         (rows * (page - 1), rows * page)):
                qid = row[0]
                flag = True
                count += 1
                #生成web图片
                url = base_url % qid
                pic_name = qid + '.png'
                pic_file = os.path.join(root_path, pic_name)
                if not os.path.exists(pic_file):
                    Utils.htmlToImages(url, pic_file, 605)
                #生成app图片
                app_url = base_app_url % qid
                pic_app_name = qid + '.app.png'
                pic_app_file = os.path.join(root_path, pic_app_name)
                if not os.path.exists(pic_app_file):
                    Utils.htmlToImages(app_url, pic_app_file, 306)
            logger.info(u'已处理菁优图片数量%d', count)
    finally:
        postgresql.close()
Ejemplo n.º 3
0
 def downloadQuestions(
         self,
         ques_type='',
         ques_pg_url=URL.QUES_PG_URL,
         select_knowled_id=SQL.SELECT_KNOWLED_ID,
         select_subject_sql=SQL.SELECT_SUBJECT_RELATION,
         select_params_type=SQL.SELECT_PARAMS_TYPE,
         update_knowled_downloded=SQL.UPDATE_KNOWLED_DOWNLODED):
     '''按知识点下载题目'''
     pg = PostgreSql()
     try:
         #获取题目类型信息
         ques_type_dic = {}
         for row_ques_types in pg.getAll(select_params_type,
                                         ('ques_type', 1)):
             subject_code, code_21cnjy, name_21cnjy, code, name = row_ques_types
             #按学科类型分组
             if not ques_type_dic.has_key(subject_code):
                 ques_type_dic[subject_code] = {}
             #具体学科的编码类型信息
             ques_type_dic[subject_code][code_21cnjy] = {
                 'code_21cnjy': code_21cnjy,
                 'name_21cnjy': name_21cnjy,
                 'code': code,
                 'name': name
             }
         #获取学段信息
         for row in pg.getAll(select_subject_sql):
             try:
                 subject_code, xd, subject_zname, course_21 = row
                 self.__downloadQuestionsBySubject(
                     row, pg, ques_pg_url, ques_type_dic[subject_code],
                     ques_type, select_knowled_id, update_knowled_downloded)
             except Exception as e:
                 logger.exception(
                     u'二一组卷网(学段:%s,学科:%s,线上学科名称:%s,线上学科代码:%s)题目导入异常', xd,
                     course_21, subject_zname, subject_code)
                 pg.rollback()
                 raise e
     finally:
         pg.close()
Ejemplo n.º 4
0
def findImags(subject):
    rows = ROWS
    try:
        pg = PostgreSql()
        flag = True
        count = 0
        qid = '0'

        err_count = 0
        while flag:
            try:
                flag = False
                update_params = []
                update_err_params = []
                for row in pg.getAll(SELECT_SQL, (subject, qid, rows)):
                    flag = True
                    qid = row[0]
                    url = getZjImg(row[1])[0]
                    try:
                        choice_answer = getTextByImageUrl2(url)
                        answer_arr = []
                        answer_arr.append(choice_answer)
                        update_params.append(
                            (json.dumps(answer_arr, ensure_ascii=False), qid))
                        count += 1
                    except Exception as ex:
                        err_count += 1
                        update_err_params.append((-1, qid))
                        logger.exception('异常的题目ID:%s,url:%s', qid, url)
                if update_params: pg.batchExecute(UPDATE_SQL, update_params)
                if update_err_params:
                    pg.batchExecute(UPDATE_STATUS_SQL, update_err_params)
                pg.commit()
                logger.info(u'学科编码:%d,已成功处理题目数量:%d,错误数量:%d' %
                            (subject, count, err_count))
            except Exception as e:
                pg.rollback()
                logger.exception("学科编码:%d,批量处理-异常信息:%s" % (subject, e.message))
    finally:
        pg.close()
Ejemplo n.º 5
0
        total += 1
        if len(params) >= 1000:
            try:
                pg.batchExecute(sql, params)
                pg.commit()
                count += len(params)
                params = []
            except Exception as e:
                pg.rollback()
                print(e.message)
            print u'处理总数:%d,成功处理数量: %d' % (total, count)
    if params:
        try:
            pg.batchExecute(sql, params)
            pg.commit()
            count += len(params)
        except Exception as e:
            pg.rollback()
            print(e.message)
        print u'处理总数:%d,成功处理数量: %d' % (total, count)


if __name__ == '__main__':
    mongon = MongoDB()
    pg = PostgreSql()
    try:
        exportKnowledToPg(mongon, pg)
    finally:
        pg.close()
        mongon.close()
Ejemplo n.º 6
0
 def findImags(self, subject):
     seq_num = 0
     rows = self.ROWS
     try:
         pg = PostgreSql()
         flag = True
         count = 0
         while flag:
             try:
                 flag = False
                 insert_params = []
                 update_params = []
                 for row in pg.getAll(self.SELECT_SQL,
                                      (subject, seq_num, rows)):
                     flag = True
                     qid = row[0]
                     old_id = row[1]
                     seq_num = row[2]
                     try:
                         urls = []
                         for col in row[3:]:
                             if col is None: continue
                             urls.extend(self.getZjImg(col))
                         print(urls)
                         # 生成临时的图片文件
                         self.generateTmpImage(urls)
                         # 插入数据到img表 存在图片状态为0,不存在图片状态为2
                         insert_params.append(
                             (qid, json.dumps(urls), 0 if urls else 2))
                         # 更新jyeoo主表的数据状态 存在图片状态修改为1,不存在图片状态为2
                         update_params.append((1 if urls else 2, qid))
                     except Exception as ex:
                         logger.exception(
                             u"处理qi=%s,old_id=%s,创建题目的图片发生异常,异常信息:%s" %
                             (qid, old_id, ex.message))
                 if update_params:
                     pg.batchExecute(self.UPDATE_SQL, update_params)
                 if insert_params:
                     pg.batchExecute(self.INSERT_SQL, insert_params)
                 pg.commit()
                 count += len(update_params)
                 logger.info(u'已成功处理题目数量:%d' % count)
             except Exception as e:
                 pg.rollback()
                 logger.exception("批量处理-异常信息:%s" % (e.message))
     finally:
         pg.close()
Ejemplo n.º 7
0
                ek_name = ek_soup['nm']
                for bk_soup in ek_soup.find_all('li',attrs={'bk':True}):
                    grade_id = bk_soup['bk']
                    grade_name = bk_soup['nm']
                    logger.info(u'选择版本年级%s-%s-%s-%s',ek_id,ek_name,grade_id,grade_name)
                    select_sql = 'select grade_id from t_grade_ek_20180601 WHERE grade_id=%s  '
                    r = pg.getOne(select_sql,(grade_id,))
                    if r : continue
                    insert_sql = 'insert into t_grade_ek_20180601(grade_id,grade_name,ek_id,ek_name,subject_id,subject_name) ' \
                          'VALUES (%s,%s,%s,%s,%s,%s)'
                    try:
                        pg.execute(insert_sql,(grade_id,grade_name,ek_id,ek_name,course[0],course[2]))
                        pg.commit()
                    except  Exception as e:
                        pg.rollback()
                        logger.exception(u'保存年级版本ID异常,异常信息%s',e.message)
        finally:
            pickle.dump(driver.get_cookies(), open("cookies.pkl", "wb"))
            pickle.dump(time.time(),open("time.pkl", "wb"))


if __name__ == '__main__':
    selection = JyeooEkGrade(browserType=2)
    pg = PostgreSql()
    try:
        c_list = pg.getAll(SQL_SUBJECT)
        for course in c_list:
            selection.mainSelection(course,pg)
    finally:
        pg.close()
Ejemplo n.º 8
0
 def downloadKnowled(self,
                     select_sql=SQL.SELECT_SUBJECT_RELATION,
                     inser_konw_sql=SQL.INSERT_21CNJY_KNOWLED,
                     know_url=URL.KNOW_URL,
                     know_child_url=URL.KNOW_CHILD_URL):
     '''下载知识点'''
     pg = PostgreSql()
     try:
         for row in pg.getAll(select_sql):
             try:
                 subject_code, xd, subject_zname, course_21 = row
                 response = self.session.get(
                     know_url % (xd, course_21, Utils.getCurrMilliSecond()),
                     headers=self.headers)
                 rs = self.__recursiveKnowled(
                     response.json(), 1, subject_code,
                     know_child_url % ('%s', xd, course_21, '%s'))
                 if rs: pg.batchExecute(inser_konw_sql, rs)
                 pg.commit()
                 logger.info(
                     u'完成二一组卷网(学段:%s,学科:%s,线上学科名称:%s,线上学科代码:%s)知识点的导入,导入知识点数量:%d',
                     xd, course_21, subject_zname, subject_code, len(rs))
             except Exception as e:
                 logger.exception(
                     u'二一组卷网(学段:%s,学科:%s,线上学科名称:%s,线上学科代码:%s)知识点的导入异常', xd,
                     course_21, subject_zname, subject_code)
                 pg.rollback()
     finally:
         pg.close()
Ejemplo n.º 9
0
 def extractQuesImage(self,
                      rows=1000,
                      select_batch_ques=SQL.SELECT_BATCH_QUES,
                      insert_image_url=SQL.INSERT_IMAGE_URL,
                      update_status=SQL.UPDATE_STATUS):
     '''分析提取题目图片'''
     logger.info(u'开始分析提取题目图片')
     seq = 0
     try:
         pg = PostgreSql()
         flag = True
         count = 0
         while flag:
             try:
                 flag = False
                 insert_params = []
                 update_params = []
                 for row in pg.getAll(select_batch_ques, (0, seq, rows)):
                     flag = True
                     qid = row[0]
                     old_id = row[1]
                     seq = row[2]
                     try:
                         urls = [row[3], row[4]]
                         for col in row[5:]:
                             if col is None: continue
                             urls.extend(self.__get21cnjyImg(col))
                         # 生成临时的图片文件
                         self.__generateTmpImage(urls)
                         # 插入数据到img表 存在图片状态为0,不存在图片状态为2
                         insert_params.append((seq, qid, Utils.toJson(urls),
                                               0 if urls else 2))
                         # 更新21cnjy主表的数据状态 存在图片状态修改为1,不存在图片状态为2
                         update_params.append((1 if urls else 2, qid))
                     except Exception as ex:
                         logger.exception(
                             u"处理qi=%s,old_id=%s,创建题目的图片发生异常,异常信息:%s" %
                             (qid, old_id, ex.message))
                 if update_params:
                     pg.batchExecute(update_status, update_params)
                 if insert_params:
                     pg.batchExecute(insert_image_url, insert_params)
                 pg.commit()
                 count += len(update_params)
                 logger.info(u'已成功处理题目数量:%d' % count)
             except Exception as e:
                 pg.rollback()
                 logger.exception("批量处理-异常信息:%s" % (e.message))
     finally:
         pg.close()
Ejemplo n.º 10
0
 def downloadSubject(self,
                     select_sql=SQL.SELECT_SUBJECT_RELATION,
                     update_sql=SQL.UPDATE_SUBJECT_RELATION,
                     subjects_url=URL.SUBJECTS_URL %
                     Utils.getCurrMilliSecond()):
     '''下载21世纪学科学段,并更新与线网关系对应'''
     response = self.session.get(subjects_url, headers=self.headers)
     zj21cnjy_subject = response.json()
     pg = PostgreSql()
     try:
         update_params = []
         for row in pg.getAll(select_sql):
             subject_code, xd, subject_zname, course_21 = row
             if course_21: continue
             course_21_name = None
             for key, value in zj21cnjy_subject[str(xd)].iteritems():
                 value_temp = value if value != u'政治思品' else u'政治'
                 if subject_zname.find(value_temp) > -1:
                     course_21 = key
                     course_21_name = value
             if course_21:
                 update_params.append(
                     (course_21, course_21_name, subject_code))
         if update_params: pg.batchExecute(update_sql, update_params)
         pg.commit()
         logger.info(u'完成21cnjy与线网学科学段对应关系更新,更新数量:%d', len(update_params))
     except Exception as e:
         logger.exception(u'21cnjy与线网学科学段对应关系更新出现异常,异常信息:%s', e.message)
         pg.rollback()
     finally:
         pg.close()
Ejemplo n.º 11
0
 def downloadQueryParams(self,
                         select_sql=SQL.SELECT_SUBJECT_RELATION,
                         insert_sql=SQL.INSERT_21CNJY_TYPE,
                         query_param_url=URL.QUERY_PARAM_URL,
                         ques_query_type=QUES_QUERY_TYPE):
     '''下载题目查询参数----如题目类型、难度等等'''
     pg = PostgreSql()
     count = 0
     try:
         for row in pg.getAll(select_sql):
             try:
                 subject_code, xd, subject_zname, course_21 = row
                 insert_params = []
                 response = self.session.get(
                     query_param_url %
                     (xd, course_21, Utils.getCurrMilliSecond()),
                     headers=self.headers)
                 for param_type, values in response.json().iteritems():
                     if not ques_query_type.has_key(param_type):
                         continue
                     for code, name in values.iteritems():
                         count += 1
                         id = count
                         insert_params.append(
                             (id, ques_query_type[param_type], code, name,
                              subject_code))
                 if insert_params:
                     pg.batchExecute(insert_sql, insert_params)
                 pg.commit()
                 logger.info(
                     u'完成二一组卷网(学段:%s,学科:%s,线上学科名称:%s,线上学科代码:%s)查询参数的导入,导入参数的数量:%d,所有学科处理总数%d',
                     xd, course_21, subject_zname, subject_code,
                     len(insert_params), count)
             except Exception as e:
                 logger.exception(
                     u'二一组卷网(学段:%s,学科:%s,线上学科名称:%s,线上学科代码:%s)查询参数的导入异常', xd,
                     course_21, subject_zname, subject_code)
                 pg.rollback()
     finally:
         pg.close()
Ejemplo n.º 12
0
    def main(self,startTime=start_time, root_path=PATH.rootImagPath,
             pic_new_path=PATH.pic_new_path):
        select_sql = self.SELECT_SQL_IMG
        update_sql = self.UPDATE_SQL
        update_sql_img = self.UPDATE_SQL_IMG
        insert_sql = self.INSERT_SQL_CONVERT

        curr_time = time.time()
        curr_time_strft = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(curr_time))
        # 记录当前分析时间
        logger.info(u'本次分析时间:%s,秒:%.2f' % (curr_time_strft, curr_time))
        postgreSql = PostgreSql()
        count = 0
        total = 0
        try:
            flag = True  # 代表数据库里面还有需要处理的数据
            id = 0
            while flag:
                try:
                    flag = False
                    update_params = []
                    update_image_params = []
                    insert_params = []
                    for rows in postgreSql.getAll(select_sql % id):
                        flag = True
                        total += 1
                        id = rows[0] if rows[0] > id else id
                        qid = rows[1]
                        urls = rows[2]
                        try:
                            isDownloadFinish = True
                            urlMap = {}
                            for url in json.loads(urls):
                                url_path = urlparse.urlsplit(url)
                                fileName = os.path.join(root_path, url_path.path[1:])
                                if os.path.exists(fileName):
                                    mtime = os.path.getmtime(fileName)
                                    if mtime >= curr_time:
                                        isDownloadFinish = False
                                    elif start_time <= mtime:
                                        (temp, extension) = os.path.splitext(fileName)
                                        # 新文件名称
                                        file_new_name = "%s%s" % (Utils.getStrMD5(url + "-mqm"), extension)
                                        # 新文件名称 - 全名
                                        file_new_name_all = os.path.join(pic_new_path, file_new_name)
                                        # 新的url
                                        url_new = image_url + file_new_name
                                        urlMap[url] = url_new
                                        if not os.path.exists(file_new_name_all):
                                            shutil.copy2(fileName, file_new_name_all)
                                            Utils.modifyMD5(file_new_name_all)
                                            insert_params.append((url, url_new))
                                    else:
                                        # 表示为之前处理过的图片
                                        sql_url = self.SQL_URL
                                        urlMap[url] = postgreSql.getOne(sql_url, (url,))[0]
                                        # if urlMap[url]:
                                        #    logger.error(u'oldurl:%s,数据不存在'% url)
                                else:
                                    isDownloadFinish = False

                            # 下载完成就更新t_jyeoo_img_url
                            if isDownloadFinish:
                                update_image_params.append((1, qid))
                                # 设置替换的图片url、更新原始数据表的状态为3(有图片、图片下载完成)
                                update_params.append((json.dumps(urlMap), 3, qid))

                        except Exception as ex:
                            logger.exception(u"处理qi=%s,校验题目的所有图片下载是否完成发生异常,异常信息:%s" % (qid, ex.message))
                    if update_params: postgreSql.batchExecute(update_sql, update_params)
                    if update_image_params: postgreSql.batchExecute(update_sql_img, update_image_params)
                    if insert_params: postgreSql.batchExecute(insert_sql, insert_params)
                    postgreSql.commit()
                    count += len(update_image_params)
                    logger.info(u'已成功处理题目数量:%d,校验题目数量总数:%d' % (count, total))
                except Exception as e:
                    postgreSql.rollback()
                    logger.exception("批量处理-异常信息:%s" % (e.message))
        finally:
            postgreSql.close()
Ejemplo n.º 13
0
 def execExtract(self,
                 select_main_sql=SQL.select_main_sql,
                 insert_image_sql=SQL.insert_image_sql,
                 update_main_sql=SQL.update_main_sql,
                 select_convert_sql=SQL.select_convert_sql):
     '''执行提取图'''
     postgreSql = PostgreSql()
     count = 0
     rs = True
     try:
         flag = True  # 代表数据库里面还有需要处理的数据
         while flag:
             try:
                 flag = False
                 insert_image_params = []
                 update_main_params = []
                 for rows in postgreSql.getAll(select_main_sql):
                     flag = True
                     seq = rows[0]
                     qid = rows[1]
                     try:
                         urls = []
                         for col in rows[2:]:
                             for j_url in JyeooUtil.getJyeooImg(col):
                                 if j_url not in urls:
                                     urls.append(j_url)
                         # 生成临时的图片文件
                         self.__generateTmpImage(urls, postgreSql,
                                                 select_convert_sql)
                         # 插入数据到img表
                         insert_image_params.append(
                             (seq, qid, json.dumps(urls), 0 if urls else 2))
                         # 更新jyeoo主表的数据状态
                         update_main_params.append((1 if urls else 2, qid))
                     except Exception as ex:
                         rs = False
                         logger.exception(
                             u"提取图片-----处理qi=%s,创建题目的图片发生异常,异常信息:%s" %
                             (qid, ex.message))
                 return rs
                 if insert_image_params:
                     postgreSql.batchExecute(insert_image_sql,
                                             insert_image_params)
                 if update_main_params:
                     postgreSql.batchExecute(update_main_sql,
                                             update_main_params)
                 postgreSql.commit()
                 count += len(insert_image_params)
                 logger.info(u'提取图片-----已成功处理题目数量:%d' % count)
             except Exception as e:
                 postgreSql.rollback()
                 rs = False
                 logger.exception(u"提取图片-----批量处理-异常信息:%s" % (e.message))
     finally:
         postgreSql.close()
     return rs
Ejemplo n.º 14
0
 def execParseImage(self,
                    select_image_sql=SQL.select_image_sql,
                    select_convert_sql=SQL.select_convert_sql,
                    update_main_url_sql=SQL.update_main_url_sql,
                    update_image_sql=SQL.update_image_sql,
                    insert_convert_sql=SQL.insert_convert_sql,
                    picture_path=PATH.picture_path,
                    pic_new_path=PATH.pic_new_path,
                    pic_relative_path=PATH.pic_relative_path,
                    image_url=image_url):
     pic_new_real_path = os.path.join(pic_new_path, pic_relative_path)
     image_real_url = urlparse.urljoin(image_url, pic_relative_path)
     logger.info(u'进入处理图片流程,原始图片路径:%s,处理后图片存放路径:%s,图片url前缀地址:%s',
                 picture_path, pic_new_real_path, image_real_url)
     if not os.path.exists(pic_new_real_path):
         os.makedirs(pic_new_real_path)
     postgreSql = PostgreSql()
     count = 0
     total = 0
     rs = True
     try:
         flag = True  # 代表数据库里面还有需要处理的数据
         id = 0
         while flag:
             try:
                 flag = False
                 update_main_params = []
                 update_image_params = []
                 insert_convert_params = []
                 for rows in postgreSql.getAll(select_image_sql % id):
                     flag = True
                     total += 1
                     id = rows[0] if rows[0] > id else id
                     qid = rows[1]
                     urls = rows[2]
                     try:
                         isDownloadFinish = True
                         urlMap = {}
                         for url in json.loads(urls):
                             url_path = urlparse.urlsplit(url)
                             fileName = os.path.join(
                                 picture_path, url_path.path[1:])
                             if os.path.exists(fileName):
                                 (temp,
                                  extension) = os.path.splitext(fileName)
                                 # 新文件名称
                                 file_new_name = "%s%s" % (Utils.getStrMD5(
                                     url + "-mqm"), extension)
                                 # 新文件名称 - 全名
                                 file_new_name_all = os.path.join(
                                     pic_new_path, file_new_name)
                                 # 新的url
                                 url_new = image_real_url + file_new_name
                                 urlMap[url] = url_new
                                 if os.path.exists(file_new_name_all):
                                     if not postgreSql.getOne(
                                             select_convert_sql, (url, )):
                                         insert_convert_params.append(
                                             url, url_new)
                                 else:
                                     shutil.copy2(fileName,
                                                  file_new_name_all)
                                     Utils.modifyMD5(file_new_name_all)
                                     insert_convert_params.append(
                                         (url, url_new))
                             else:
                                 #查询
                                 rs = postgreSql.getOne(
                                     select_convert_sql, (url, ))
                                 if rs:
                                     urlMap[url] = rs[0]
                                 else:
                                     isDownloadFinish = False
                         # 下载完成就更新t_jyeoo_img_url
                         if isDownloadFinish:
                             update_image_params.append((1, qid))
                             # 设置替换的图片url、更新原始数据表的状态为3(有图片、图片下载完成)
                             update_main_params.append(
                                 (json.dumps(urlMap), 3, qid))
                     except Exception as ex:
                         rs = False
                         logger.exception(
                             u"处理图片流程,qi=%s,校验题目的所有图片下载是否完成发生异常,异常信息:%s" %
                             (qid, ex.message))
                 return rs
                 if update_main_params:
                     postgreSql.batchExecute(update_main_url_sql,
                                             update_main_params)
                 if update_image_params:
                     postgreSql.batchExecute(update_image_sql,
                                             update_image_params)
                 if insert_convert_params:
                     postgreSql.batchExecute(insert_convert_sql,
                                             insert_convert_params)
                 postgreSql.commit()
                 count += len(update_image_params)
                 logger.info(u'处理图片流程,已成功处理题目数量:%d,校验题目数量总数:%d' %
                             (count, total))
             except Exception as e:
                 rs = False
                 postgreSql.rollback()
                 logger.exception(u"处理图片流程,批量处理-异常信息:%s" % (e.message))
     finally:
         postgreSql.close()
     return rs
Ejemplo n.º 15
0
            except Exception as e:
                logger.exception(u'解析获取知识点ID失败,原始文本:%s,错误信息:%s',
                                 a_soup['onclick'], e.message)
            points.append({'code': know_id, 'name': know_name})
        points = json.dumps(points, ensure_ascii=False)
        analysis_soup = box_soup.find('em', text=u'【分析】').parent
        analysis = re.findall(
            u'<div\s+class="pt[\d]"\s*>\s*<!--B[\d]-->\s*(.+?)<!--E[\d]-->\s*</div>',
            unicode(analysis_soup))[0].replace(u'<em>【分析】</em>', '')
        return (answer, analysis, points)


if __name__ == '__main__':
    pg_host = config.get(SELECTION_JYEOO, 'pg_host')
    pg_port = config.getint(SELECTION_JYEOO, 'pg_port')
    pg = PostgreSql(host=pg_host, port=pg_port)
    selection = None
    try:
        selection = JyeooSelectionQuestion(pg)
        #查询需要下载菁优题目的学科
        sd_list = []
        for s in pg.getAll(SQL_SUBJECT_DOWLOAD):
            sd_list.append(s[0])
        #开始下载
        c_list = pg.getAll(SQL_SUBJECT)
        for course in c_list:
            if course[0] == selection.getSubjectCode(
            ) and course[0] in sd_list:
                selection.mainSelection(course, pg)
        logger.info(u'本账号(%s)下、需要爬取版本的题目已全部完成!', selection.user_name)
    except Exception as e:
Ejemplo n.º 16
0
    def paresToPg(self,cid):
        mg = MongoDB()
        pg = PostgreSql()
        try:
            coll = mg.getCollection(COLL.question)
            # 获取题型
            type_dic = {}
            for row in pg.getAll('SELECT code,name,zujuan_code,zujuan_name FROM t_ques_type_zujuan_relation where subject_code = %s',(cid,)):
                type_dic[str(row[2])] = {'code': row[0], 'name': row[1]}
            insert_params = []
            self.total = 0
            self.count = 0
            for doc in coll.find({'cid':cid,'status':{'$in':[0,1]}}):
                try:
                    self.total += 1
                    old_id = doc['question_id'] #题目原始id
                    qid = str(uuid.uuid1()) # 新的题目Id
                    difficulty = doc['new_data']['difficult_index']  # 难度
                    subject = cid #课程ID
                    provider = '04' # 来源
                    status = 0 #状态
                    # 最后一级知识点集合
                    points = []
                    for kid_dic in doc['kids']:
                        points.append({'code':kid_dic['kid'],'name':kid_dic['ktitle']})
                    points = json.dumps(points)
                    # 题目类型处理
                    question_channel_type = doc['new_data']['question_channel_type']
                    cate = type_dic[question_channel_type]['code']
                    cate_name = type_dic[question_channel_type]['name']

                    # 题干、答案
                    content = doc['new_data']['question_text']
                    answer = []
                    if doc['new_data'].has_key('list') and doc['new_data']['list']:
                        for childe_content in  doc['new_data']['list']:
                            content = '%s <br/> %s' %(content,childe_content['question_text'])
                            if not childe_content['answer']:
                                coll.update_one({'question_id': old_id},
                                                {'$set': {'status': None},
                                                 "$currentDate": {"lastModified": True}})
                                raise Exception('题目答案异常,id:%s' % old_id)
                            answer.append('<img align="top" src="%s" />' % childe_content['answer'])
                            if childe_content['options']:
                                content = '%s <br/> %s' % (content, json.dumps( childe_content['options'],ensure_ascii=False))
                    else:
                        if not doc['new_data']['answer']:
                            coll.update_one({'question_id': old_id},
                                            {'$set': {'status': None},
                                             "$currentDate": {"lastModified": True}})
                            raise Exception('题目答案异常,id:%s' % old_id)
                        answer.append('<img align="top" src="%s" />' % doc['new_data']['answer'])
                    answer = json.dumps(answer,ensure_ascii=False)

                    #题目选项处理
                    options = []
                    if question_channel_type in ['1','2']:
                        if isinstance(doc['new_data']['options'], unicode):
                            #选择题没有选项的状态,变为-1
                            coll.update_one({'question_id': old_id},
                                            {'$set': {'status': -1}, "$currentDate": {"lastModified": True}})
                        for key,value in doc['new_data']['options'].items():
                            options.insert(ord(key)-ord('A'),value)
                    options = json.dumps(options,ensure_ascii=False)
                    analyses = doc['new_data']['explanation'] if doc['status'] == 1 else None  #解析
                    if analyses:
                        analyses = '< img align="top" src="%s" />' % analyses
                    insert_params.append((qid,answer,analyses,cate,cate_name,content,options,points,subject,difficulty,status,provider,old_id))
                except Exception as e:
                    logger.exception(u'处理分析组卷题目失败,题目id-%s',old_id)
                if len(insert_params)>= 1000:
                    count = self.batchInsertExecute(pg,insert_params)
                    insert_params = []
            self.batchInsertExecute(pg,insert_params)
        finally:
            mg.close()
            pg.close()