예제 #1
0
    def to_ot_process_error(self, old, todat):
        """此方法用于写库,如有需求写入新库可以调用此方法。

        """

        print old.id, old.url
        olds = Otraw
        print "[Analysis to %s] time: %s " % (todat, time.strftime('%y-%m-%d %H-%M-%S'))
        if ('openlaw' in old.url):
            self.point.set_tablename(Open)
            self.point.set_filter(filter='id = %s' % old.parent_id, limit=1)

            old_data = self.point.query()

        else:
            self.point.set_tablename(olds)
            self.point.set_filter(filter='id = %s' % old.parent_id, limit=1)

            old_data = self.point.query()
        if not old_data:
            return

        old_data = old_data[0]

        analy = Analyse()
        analy.text = old_data.content_all

        _header, part, content, case_sign = analy.split_to_four_parts()

        #: 开始检测数据错误
        ero = Judgment_checking(_header, part, content, case_sign)

        #: 检查案由
        ero.Checking_anyou(old.anyou)

        #: 检查原告被告
        ero.Checking_people(old.plaintiff, old.defendant)

        #: 检查原告被告律师
        ero.Checking_lawyer(old.plaintiff_lawyers, old.defendant_lawyers)

        #: 检查署名信息
        ero.Checking_sign()

        if ero.errors:
            # return list(set(ero.errors))
            for item in ero.errors:
                new = ot_process_error()
                new.judge_id = old.id
                new.action = item[0]
                new.error = item[1]
                new.user_name = 'System'
                new.addtime = str(int(time.time()))
                point = insert_database(
                    'inspection', tablename=todat, editor=new)
                point.update()
        return
예제 #2
0
    def to_ot_rawdata_judgement_court_gov_cn_old(self, old_data, todat):
        """分析方法,开始对数据进行分析

        目前只更新中间库的原被告和律师律所。
        其他信息并未更新。
        如有其他需求!可在中间库更新至结果库时进行进一步的分析。

        """
        analy = Analyse()
        analy.text = old_data.content_all
        new = Otraw()
        _header, part, content, case_sign = analy.split_to_four_parts()

        clients_attr, lawyers_attr = analy.guess_clients_lawyers(
            part.split('\n'))

        clients_attr[u'原告'] = list(set(clients_attr[u'原告']))
        clients_attr[u'被告'] = list(set(clients_attr[u'被告']))
        lawyers_attr[u'原告'] = list(set(lawyers_attr[u'原告']))
        lawyers_attr[u'被告'] = list(set(lawyers_attr[u'被告']))
        new.clients_attr = clients_attr
        new.lawyers_attr = lawyers_attr

        #plaintiff = ';'.join(u"%s:%s:%s" % client for client in clients_attr[u'原告'])
        #defendant = ';'.join(u"%s:%s:%s" % client for client in clients_attr[u'被告'])
        #plaintiff_lawyers = ';'.join(u"%s:%s" % lawyer for lawyer in lawyers_attr[u'原告'])
        #defendant_lawyers = ';'.join(u"%s:%s" % lawyer for lawyer in lawyers_attr[u'被告'])
        #new.lawyer_name = ''
        #new.firm_name = ''
        lawyers = []
        frim = []
        for item in lawyers_attr:
            for x, y in lawyers_attr[item]:
                if x:
                    lawyers.append(x.strip())

                if y:
                    frim.append(y.strip())

        # print new.lawyer_name, new.firm_name
        new.lawyer_name = ",".join(lawyers)
        new.firm = ",".join(frim)

        print old_data.id, old_data.url, new.lawyer_name, new.firm

        new.id = old_data.id
        point = insert_database('Judgmentold', tablename=todat, editor=new)
        point.update()
예제 #3
0
    def to_ot_judgment_diffed(self, old, todat):
        """选择to库,此处可以是个任意库,但是必须对应配置文件中的库.

        此前该方法作为验证正确性使用,现在用于更新使用


        """

        # if old.status in ['-1', '3', '1'] or (not old.url):
        #    return

        print old.id, old.url
        olds = Otraw

        #diff = ot_judgment_diffed()

        print "[Analysis to %s] time: %s " % (todat, time.strftime('%y-%m-%d %H-%M-%S'))

        if ('openlaw' in old.url):
            self.point.set_tablename(Open)
            self.point.set_filter(filter='id = %s' % old.parent_id, limit=1)

        else:
            self.point.set_tablename(olds)
            self.point.set_filter(filter='id = %s' % old.parent_id, limit=1)

        old_data = self.point.query()
        if not old_data:
            return

        old_data = old_data[0]

        analysis_data = self.fuzzy_analyse(old_data)

        """将此处代码注释去掉,可对验证正确性库进行写入数据
        new_plain_lawyer = {'Success': old.plaintiff_lawyers,
                            'old': plaintiff_lawyers,
                            'new': analysis_data[1][1]
                            }
        new_defen_lawyer = {'Success': old.defendant_lawyers,
                    'old': defendant_lawyers,
                    'new': analysis_data[2][1]
                    }
        plaintiff = ';'.join(u"%s:%s:%s" % client for client in old_data.clients_attr[u'原告'])
        defendant = ';'.join(u"%s:%s:%s" % client for client in old_data.clients_attr[u'被告'])
        
        new_plain_people = {'Success': old.plaintiff, 
                            'old': plaintiff,
                            'new': analysis_data[1][0]
                            }
        new_defen_people = {'Success': old.defendant, 
                            'old': defendant,
                            'new': analysis_data[2][0]
                            }
        
        diff.case_sign = analysis_data[0][3]
        diff.header = analysis_data[0][0]
        diff.content = analysis_data[0][2]
        diff.firsthead = analysis_data[0][1]
        diff.defendant = json.dumps(new_defen_people)
        diff.defendant_lawyers = json.dumps(new_defen_lawyer)
        diff.plaintiff = json.dumps(new_plain_people)
        diff.plaintiff_lawyers = json.dumps(new_plain_lawyer)
        diff.judgmentid = old.id
        diff.url = old.url
        point = insert_database('Judgment', tablename = todat, editor = diff)  #设置添加数据
        point.insert()   #添加数据
        """

        Update = ot_judge_base()

        # if (not old.chief_judge and not old.judge and not old.acting_judges ) or \
        #   (u'事务所' not in analysis_data[1][1] and u'事务所' not in analysis_data[2][1]):
        # : 删除信息
        # print 'Delete new Analy %s' % old.id
        # point = insert_database('Judgment', tablename = ot_judge_base, filter = 'id = %s' % old.id)
        # point.delete()
        #    pass
        #: 更新CASE_SIGN
        case_sign = analysis_data[0][3].split('\n')
        Update.case_sign = '<p>' + '</p><p>'.join(case_sign) + '</p>'
        # else:
        Pules = {}
        #: 更新案由信息
        anyou = []
        if old.type == u'行政':
            #: 检查标题
            anyou.extend(filter(lambda x: x in analysis_data[0][0], ACTIONS))
            if not anyou:
                #: 检查第一行
                anyou.extend(filter(
                    lambda x: x in "".join(analysis_data[0][2].split('\n')[:1]), ACTIONS))

        else:
            anyou.extend(filter(
                lambda x: x in "".join(analysis_data[0][2].split('\n')[:1]), ACTIONS))
            # if not anyou:
            anyou.extend(filter(lambda x: x in analysis_data[0][0], ACTIONS))

        for item in anyou:
            Pules[len(item)] = item

        if Pules:
            anyou = Pules[max(Pules)]

        if anyou:
            Update.anyou_id = ACTION_ID[anyou.strip()]
            if ANYOU_ALIAS.has_key(anyou):
                Update.anyou = ANYOU_ALIAS[anyou]
            else:
                Update.anyou = anyou
        else:
            Update.anyou = ''
            Update.anyou_id = 0
        # print Update.anyou_id
        area_item = AREA.ident(old.department.replace(u'县', '').replace(u'自治区', '')
                               .replace(u'管城回族区', '管城回区').encode('gbk'))

        if area_item:
            Update.areacode = area_item['areano']
        else:
            area_item = AREA.ident(
                old.department.replace(u'市', '').replace(u'区', '').replace(u'省', '').encode('gbk'))
            if area_item:
                Update.areacode = area_item['areano']

        # print 'Update new Analysis'
        Update.defendant = analysis_data[2][0]
        Update.defendant_lawyers = analysis_data[2][1]
        Update.plaintiff = analysis_data[1][0]
        Update.plaintiff_lawyers = analysis_data[1][1]
        Update.id = old.id

        #: 更新审判人员信息
        case_sign_key = analysis_data[3]

        Update.chief_judge = ",".join(case_sign_key[u'审判长'])
        Update.acting_judges = ",".join(case_sign_key[u'代理审判员'])
        Update.judge = ",".join(case_sign_key[u'审判员'])
        Update.clerk = ",".join(list(set(case_sign_key[u'书记员'])))

        head_key = analysis_data[4]

        Update.department = head_key['department']
        Update.case_number = head_key['case_number']

        Update.type = head_key['type']
        Update.title = head_key['title']

        #: 分析裁判时间
        Update.end_date = arrow.get(
            analysis_data[6], 'Asia/Shanghai').timestamp
        #: 敏感信息
        Update.replace_data = analysis_data[5]

        #: 检查敏感信息
        #: for item in REPLACE:
        #:     if item in old_data.content_all:
        #:          Update.replace_data[item] = '****'

        Update.replace_data = json.dumps(Update.replace_data)
        #: 开始检查数据 正确性
        ero = JudgmentCheck.Judgment_checking(
            analysis_data[0][0], analysis_data[0][1], analysis_data[0][2], analysis_data[0][3])

        #: 验证案由
        ero.Checking_anyou(Update.anyou, old.type)

        #: 验证地区
        ero.Checking_area(Update.areacode, old.department)

        #: 验证原告被告
        ero.Checking_people(Update.plaintiff, Update.defendant)

        #: 验证原、被告律师
        ero.Checking_lawyer(Update.plaintiff_lawyers, Update.defendant_lawyers)

        #: 验证署名
        #: 署名可以添加详细署名
        #:如
        #: keys = {'judge': old.judge}
        #: ero.Checking_sign(keys)
        #: 这样就可以验证详细的署名信息
        ero.Checking_sign()

        #: 审理机构检查
        ero.Checking_department(old.department.decode('utf8'))

        if not ero.errors:
            for attr in old.__dict__.keys():
                if not getattr(old, attr) or str(getattr(old, attr)).strip() == u'无':
                    if (not getattr(Update, attr)) or str(getattr(Update, attr)).strip() == '':
                        setattr(Update, attr, u'无')
            Update.come_from = 'Update_Judgment_Checking_Success'
            Update.base_check = 1
            Update.status = 2

        else:

            Update.come_from = 'Update_Judgment_Checking_Faild'
            Update.status = 0
            Update.base_check = 0

        point = insert_database(
            'Judgment', tablename=ot_judge_base, editor=Update)
        code = point.update()
        if code == 1062:
            #: 存在重复数据
            print "Delte From id = %s, table = %s" % (Update.id, ot_judge_base.__name__)
            point.set_filter('id = %s' % old.id)
            point.delete()
        return
예제 #4
0
    def to_ot_rawdata_judgement_court_gov_cn_old(self, old, todat):

        new = ot_rawdata_judgement_court_gov_cn_old()
        new.url = old.url
        new.referer = old.url

        analy = Analyse()
        try:
            raw_html = XPath(old.source_data).execute(
                '//*[@id="ws"]/table')[0].to_html()
        except IndexError:
            print '[Error] Analyse: url = %s' % new.url

            # Request Get
            for item in PROXY:

                r = requests.get(
                    new.url, proxies={'http': 'http:%s:59274' % item}, timeout=30)
                if r.ok:
                    break
            if not r.ok:
                raise Exception,\
                    'Get faild url = %s' % old.url

            to = old.__class__()
            to.id = old.id
            to.source_data = r.text
            raw_html = XPath(to.source_data).execute(
                '//*[@id="ws"]/table')[0].to_html()
            point = insert_database(
                'Judgment', tablename=to.__class__, editor=new)
            point.update()
            return
        text = html_to_text(HTML_PARSER.unescape(raw_html))
        try:
            text = re.sub('//W3C//DTD HTML 4.0 Transitional//EN\'>', '', text)
        except:
            pass
        analy.text = text
        new.content_all = analy.text

        _header, part, content, case_sign = analy.split_to_four_parts()

        new.clients_attr, new.lawyers_attr = analy.guess_clients_lawyers(
            part.split('\n'))

        end_date = analy.guess_end_date(case_sign)
        new.end_date = end_date

        case_sign_key = analy.guess_case_sign(case_sign.split('\n'))
        head_key = analy.guess_header_types(_header.split('\n'))

        new.content = part + content

        new.case_sign = case_sign
        new.case_number = head_key['case_number']
        new.department = head_key['department']
        new.type = head_key['type']
        new.title = head_key['title']
        new.case_type = head_key['case_type']

        new.procedure = new.procedure or analy.guess_procedure(new.case_number)

        new.replace_data = json.dumps(analy._replace_data(part))

        new.chief_judge = ",".join(case_sign_key[u'审判长'])
        new.acting_judges = ",".join(case_sign_key[u'代理审判员'])
        new.judge = ",".join(case_sign_key[u'审判员'])
        new.clerk = ",".join(list(set(case_sign_key[u'书记员'])))

        new.input_time = arrow.now().timestamp

        # if (not new.chief_judge and not new.judge and not new.acting_judges.strip()) or \
        #   (u'事务所' not in new.plaintiff_lawyers and u'事务所' not in new.defendant_lawyers):
        #    return

        new.parent_id = old.id
        print 'Runing String <ot_rawdata_judgement_court_gov_cn_old> parent_id = %s , url = %s' % (old.id, old.url)

        point = insert_database(
            'Judgment', tablename=ot_rawdata_judgement_court_gov_cn_old, editor=new)
        point.insert()
예제 #5
0
def insert_base(old, Update_id=0):
    area = Area_duct()
    actions_id, actions, replace, anyou_alias = anyou_replace("Judgment")
    new = ot_judge_base()
    for attr in ("content", "case_sign", "case_type", "department", "end_date"):
        if getattr(old, attr) is None:
            print >> __output__, u"【提示】%s 字段为空,请检查数据" % attr
            return
    # if old.case_type not in self.case_mode:
    #    return
    for mode in case_mode:
        if mode in old.case_type:
            new.case_type = mode
    if not new.case_type:
        print >> __output__, u"【提示】文书字号为空"
        return
    if old.content == "":
        print >> __output__, "u你所访问的数据为空"
    new.content = "<p>" + "</p><p>".join(old.content.split("\n")) + "</p>"
    new.content_md5 = md5(new.content.encode("utf8")).hexdigest()
    new.case_sign = "<p>" + "</p><p>".join(old.case_sign.split("\n")) + "</p>"
    new.case_number = old.case_number

    new.type = new.case_type[:-3]
    #: 如果是仲裁,那属于民事
    if new.type == u"仲裁":
        new.type = u"民事"
    new.title = old.title
    if not new.title:
        new.title = old.content_all.split("\n")[0]

    Pules = {}
    #: 更新案由信息

    if new.type == u"行政":
        anyou = filter(lambda x: x in new.title, actions)
        if not anyou:
            anyou = filter(lambda x: x in old.content.split("\n")[0], actions)
            if anyou:
                anyou = anyou[0]
        else:
            for item in anyou:
                Pules[len(item)] = item
    else:
        anyou = filter(lambda x: x in old.content.split("\n")[0], actions)
        if not anyou:
            anyou = filter(lambda x: x in new.title, actions)
            for item in anyou:
                Pules[len(item)] = item
        else:
            anyou = anyou[0]
    if Pules:
        anyou = Pules[max(Pules)]

    new.anyou_id = actions_id[anyou.strip()]
    new.anyou = anyou
    # new.anyou_id = actions_id[new.anyou]

    new.department = old.department

    new.chief_judge = old.chief_judge

    new.judge = old.judge

    if old.acting_judges:
        new.acting_judges = old.acting_judges
    else:
        new.acting_judges = "无"

    new.clerk = old.clerk

    new.plaintiff = ";".join(u"%s:%s:%s" % client for client in old.clients_attr[u"原告"])
    new.plaintiff_lawyers = ";".join(u"%s:%s" % lawyer for lawyer in old.lawyers_attr[u"原告"])
    print >> __plai_people__, u"原告:%s, 律师:%s" % (new.plaintiff, new.plaintiff_lawyers)
    new.defendant = ";".join(u"%s:%s:%s" % client for client in old.clients_attr[u"被告"])
    new.defendant_lawyers = ";".join(u"%s:%s" % lawyer for lawyer in old.lawyers_attr[u"被告"])
    print >> __defan_people__, u"被告:%s, 律师:%s" % (new.defendant, new.defendant_lawyers)
    new.procedure = old.procedure

    new.end_date = arrow.get(old.end_date, "Asia/Shanghai").timestamp

    # 分析地区
    area_item = area.ident(new.department.encode("gbk"))

    if area_item and area_item.get("staut") == "timed out":
        new.areacode = area_item["areano"]

    new.url = old.referer

    print >> __infomation__, old.case_sign
    new.replace_data = json.dumps(old.replace_data)
    # dic = {}
    # for k, v in old.replace_data.iteritems():
    #    if not re.match(ur".*(某|X|x|\*).*", k):
    #        dic.update({k:v})
    # new.replace_data = json.dumps(dic)
    new.input_time = arrow.now().timestamp
    if (not new.chief_judge and not new.judge and not new.acting_judges.strip()) or (
        u"事务所" not in new.plaintiff_lawyers and u"事务所" not in new.defendant_lawyers
    ):
        print >> __output__, u"不存在事务所或者署名信息"
        return
    print old.id
    if Update_id != 0:
        new.id = Update_id
        new.url = old.url
        point = insert_database("Judgment", tablename=ot_judge_base, editor=new)
        point.update()
    else:
        point = insert_database("Judgment", tablename=ot_judge_base, editor=new)
        point.insert()
예제 #6
0
def web_port(case_number="", title="", depart="", datasource=None):
    """WEB API POST 
        POST:
            http://192.168.1.118/api/v1.0/judgment/
            case_number => case_number,
            title => title,
            depart => department,
            datasource => content
        Returns:
            JSON(
                {
                "result": {
                    "People": {
                            "Plain": "",
                            "defen": ""
                        },
                    "danger": "",
                    "error": "Success",
                    "sign": "",
                    "status": 1
                    }
                })
    """
    if datasource:
        new = test_old()
        analy = JudgmentProcesser()

        analy.fuzzy_analyse(new, datasource)

        try:
            new.clients_attr[u"原告"] = list(set(new.clients_attr[u"原告"]))
            new.clients_attr[u"被告"] = list(set(new.clients_attr[u"被告"]))
            new.lawyers_attr[u"原告"] = list(set(new.lawyers_attr[u"原告"]))
            new.lawyers_attr[u"被告"] = list(set(new.lawyers_attr[u"被告"]))
            plaintiff = ";".join(u"%s:%s:%s" % client for client in new.clients_attr[u"原告"])
            defendant = ";".join(u"%s:%s:%s" % client for client in new.clients_attr[u"被告"])
            plaintiff_lawyers = ";".join(u"%s:%s" % lawyer for lawyer in new.lawyers_attr[u"原告"])
            defendant_lawyers = ";".join(u"%s:%s" % lawyer for lawyer in new.lawyers_attr[u"被告"])
        except:
            return {"error": datasource}

        point = insert_database("Sqlextend", tablename=test_old, editor=new)  # 设置添加数据
        try:
            point.insert()  # 添加数据
        except Exception, e:
            return {"error": e}

        print plaintiff
        print defendant
        print plaintiff_lawyers
        print defendant_lawyers

        return {
            "People": {u"原告": plaintiff, u"被告": defendant},
            "lawyers": {u"原告": plaintiff_lawyers, u"被告": defendant_lawyers},
            "case_number": new.case_number,
            "title": new.title,
            "sign": new.case_sign,
            "type": new.case_type,
            "error": 0,
        }
예제 #7
0
    def to_ot_judgment_inspection(self, old, todat):
        new = ot_judge_inspection()

        new.judgmentid = old.id

        people = [u'原告', u'被告', u'原告律师', u'被告律师',
                  u'委托代理人', u'', u'上诉人', u'被上诉人']
        content = []
        for item in old.content.replace("<p>", '').split("</p>"):
            content.append(item)
            if (u'一案' in item) or (u'诉状' in item) or (u'提起公诉' in item) or \
                    (u'检察院指控' in item) or (u'立案执行' in item) or (u'提起诉讼' in item) or (u'提起上诉' in item) \
                    or (u'诉至本院' in item) or (u'本院受理' in item):
                break
        lawyer = {u'plai': [], u'defen': []}
        lawyer_per = {u'plai': [], u'defen': []}
        frims = []
        checking = {u'原告': 0, u'被告': 0}

        for item in content:
            # if re.match(u'代理人|辩护人|律师|律所|顾问', item):
            if u'代理人' in item or u'辩护人' in item or u'律师' in item:
                frim = self._check_lawyer_in_line(item)
                if checking[u'原告'] == 1:
                    for firm in frim:
                        lawyer['plai'].append(firm[0] + ':' + firm[1])
                        frims.append(firm[1])

                    checking[u'原告'] = 0
                elif checking[u'被告'] == 1:
                    for firm in frim:
                        lawyer['defen'].append(firm[0] + ':' + firm[1])
                        frims.append(firm[1])
                    checking[u'被告'] = 0

            elif re.search(u'^原告|^上诉人|^申诉人|起诉人|^公诉机关|申请再审人|再审申请人|^申请人|^第.原告|^原告.:', item):
                checking[u'原告'] = 1
                law = self._check_client_in_line(item)
                if type(law) is list:
                    for pl in law:
                        lawyer_per['plai'].append(pl[0])
                else:
                    lawyer_per['plai'].append(law[0])
                if law[1] == u'机构':
                    frims.append(law[0])
            elif re.search(u'被上诉人|被申诉人|被起诉人|^被告|被申请人|^原公诉机关|^第.被告|^被告.:', item):
                checking[u'被告'] = 1
                law = self._check_client_in_line(item)
                if type(law) is list:
                    for pl in law:
                        lawyer_per['defen'].append(pl[0])
                else:
                    lawyer_per['defen'].append(law[0])

                if law[1] == u'机构':
                    frims.append(law[0])

        lawyer_per['plai'] = list(set(lawyer_per['plai']))
        lawyer_per['defen'] = list(set(lawyer_per['defen']))
        lawyer['plai'] = list(set(lawyer['plai']))
        lawyer['defen'] = list(set(lawyer['defen']))
        frims = list(set(frims))
        new.people = json.dumps(lawyer_per)
        new.lawyers = json.dumps(lawyer)
        new.frims = json.dumps(frims)

        # 分析署名
        #new.case_sign = old.case_sign
        sp_people = [u'审判员', u'审判长', u'人民陪审员', u'书记员',
                     u'代理审判员', u'代书记员', u'见习书记员', u'代理书记员']
        #people_key = dict(zip(sp_people, ['', '', '', '', '', '', '']))
        sp_all = []
        case_sign = old.case_sign.replace('<p>', '').split('</p>')
        for item in case_sign:
            for pl in sp_people:
                if item.startswith(pl):
                    #people_key[pl] = item.decode('utf8').replace(pl,'')
                    sp_all.append(item.replace(pl, ''))
        new.case_sign = ",".join(sp_all)
        new.firm = ",".join(frims)
        point = insert_database(
            'Judgment', tablename=todat, editor=new)  # 设置添加数据
        point.insert()  # 添加数据
        return
예제 #8
0
    def Querst_request(self):
        """搜索实例

        """

        point = insert_database('Sqlextend', tablename=ot_baidu_search_info)
        for key in self.keys:
            # self.firefox.get(self.url)
            for i in range(0, 20):
                self.set_pn(key, i)
                while True:
                    try:
                        self.firefox.get(self.url)
                        break
                    except:
                        # self.firefox.quit()
                        self.reset_firefox()
                        continue

                data = self.firefox.page_source

                if data:
                    xhtml = html.document_fromstring(data)
                    content = zip(xhtml.xpath('//div[@id="content_left"]//div[@class="f13"]//div[@class="c-tools"]'),
                                  xhtml.xpath('//div[@id="content_left"]//div[@class="f13"]//span[@class="g"]'))
                    for title, url in content:
                        db = ot_baidu_search_info()
                        try:
                            db.title = json.loads(
                                title.get('data-tools'))['title'].encode('utf8')
                        except:
                            try:
                                db.title = title.get(
                                    'data-tools').split(':')[1].split(',')[0].replace('"', '').encode('utf8')
                            except IndexError:
                                pass

                        db.url = url.text_content().encode('utf8')
                        db.key = key
                        insert_database(
                            'Sqlextend', tablename=ot_baidu_search_info, editor=db)
                        point.set_value(db)
                        point.insert()
                    """
                    for item in xhtml.xpath('//div[@id="content_left"]//div[@class="f13"]'):
                        #print item.get('href'), item.text_content().encode('utf8')
                        db = ot_baidu_search_info()
                        import pdb
                        pdb.set_trace()
                        db.title = item.xpath('//h3//a')[0].title
                        db.url = item.xpath('//span[@class="g"]')[0].text_content
                        
                        #db.url = item.get('href')
                        db.key = key
                        insert_database('Sqlextend', tablename = ot_baidu_search_info, editor = db)
                        point.set_value(db)
                        point.insert()
                    """
                time.sleep(2)

        self.firefox.close()