Esempio n. 1
0
def getTotalPage(url):
    #url = "http://www.datashanghai.gov.cn/query!queryProduct.action?currentPage=1"
    html = getHtml(url).decode('utf-8')
    reg = r"totalPage = '(\d{1,5})'"
    p = re.compile(reg)
    result = re.findall(p, html)
    return int(result[0])
Esempio n. 2
0
def getTotalPage(url):
    #url = "http://www.datashanghai.gov.cn/query!queryProduct.action?currentPage=1"
    html = getHtml(url).decode('utf-8')
    reg = r"totalPage = '(\d{1,5})'"
    p = re.compile(reg)
    result = re.findall(p, html)
    return int(result[0])
Esempio n. 3
0
def getPageLinked(url):
    html = getHtml(url).decode('utf-8')
    start = int(html.find('class="list"'))
    end = int(html.find('id="pageSpan"'))
    content = html[start:end]
    reg = r'<a href="query!(.*)" title="(.*)" target=".*">(\s*.*){12}<strong class=".*">(.*)</strong></dt>'
    p = re.compile(reg)
    result = re.findall(p, content)
    return result
Esempio n. 4
0
def getPageLinked(url):
    html = getHtml(url).decode('utf-8')
    start = int(html.find('class="list"'))
    end = int(html.find('id="pageSpan"'))
    content = html[start:end]
    reg = r'<a href="query!(.*)" title='
    p = re.compile(reg)
    result = re.findall(p, content)
    return result
Esempio n. 5
0
def getSHPageLinked(url):
    html = getHtml(url).decode('utf-8')
    start = int(html.find('class="list"'))
    end = int(html.find('id="pageSpan"'))
    content = html[start:end]
    reg = r'<a href="query!(.*)" title="(.*)" target=".*">(\s*.*){12}<strong class=".*">(.*)</strong></dt>'
    p = re.compile(reg)
    result = re.findall(p, content)
    return result
Esempio n. 6
0
def getFromSubject(url):
    html = getHtml(url).decode('utf-8')
    sign = int(html.find('ess_ctr473_contentpane'))
    start = int(html.find('<ul', sign))
    end = int(html.find('</div>', sign))
    content = html[start:end]
    reg = r''
    p = re.compile(reg)
    result = re.findall(p, content)
    return content
Esempio n. 7
0
def getFromGov(url):
    html = getHtml(url).decode('utf-8')
    sign = int(html.find('ess_ctr506_OrganizationsListTree_divDataOrg'))
    start = int(html.find('<ul>', sign))
    end = int(html.find('</ul>', sign))
    content = html[start:end]
    reg = r'<a href="../(.*)">(.*)((\d*))</a>'
    p = re.compile(reg)
    result = re.findall(p, content)
    return result
Esempio n. 8
0
def getFromGov(url):
    html = getHtml(url).decode('utf-8')
    sign = int(html.find('ess_ctr506_OrganizationsListTree_divDataOrg'))
    start = int(html.find('<ul>', sign))
    end = int(html.find('</ul>', sign))
    content = html[start:end]
    reg = r'<a href="../(.*)">(.*)((\d*))</a>'
    p = re.compile(reg)
    result = re.findall(p, content)
    return result
Esempio n. 9
0
def getFromSubject(url):
    html = getHtml(url).decode('utf-8')
    sign = int(html.find('ess_ctr473_contentpane'))
    start = int(html.find('<ul', sign))
    end = int(html.find('</div>', sign))
    content = html[start:end]
    reg = r''
    p = re.compile(reg)
    result = re.findall(p, content)
    return content
Esempio n. 10
0
def getSHInfo(pageUrl, dataType, title):
    html = getHtml(url + pageUrl).decode('utf-8')
    reg = r'<td>(.*)\r*\s*</td>'
    sign = int(html.find('</table>'))
    p = re.compile(reg)
    result = re.findall(p,html[:sign])
    res = list(result)
    res.insert(0, title)
    res.insert(1, dataType)
    resStr = str(res)
    resStr = resStr.replace(' ', '')
    res = eval(resStr)
    return res
Esempio n. 11
0
def getPageLinked(url, jg):
    html = getHtml(url).decode('utf-8')
    reg = r'<a id=".*" class="hylName" href=".*">(.*)</a>(.*\s*){2}(.*)\r'
    p = re.compile(reg)
    result = re.findall(p, html)
    res = []
    for each in result:
        tempList = []
        tempList.append(each[0])
        tempList.append(each[2].replace(' ', ''))
        tempList.append(jg)
        res.append(tempList)
    return res
Esempio n. 12
0
def getInfo(pageUrl, dataType, title):
    html = getHtml(url + pageUrl).decode('utf-8')
    reg = r'<td>(.*)\r*\s*</td>'
    sign = int(html.find('</table>'))
    p = re.compile(reg)
    result = re.findall(p, html[:sign])
    res = list(result)
    res.insert(0, title)
    res.insert(1, dataType)
    resStr = str(res)
    resStr = resStr.replace(' ', '')
    res = eval(resStr)
    return res
Esempio n. 13
0
def getPageLinked(url, jg):
    html = getHtml(url).decode('utf-8')
    reg = r'<a id=".*" class="hylName" href=".*">(.*)</a>(.*\s*){2}(.*)\r'
    p = re.compile(reg)
    result = re.findall(p, html)
    res = []
    for each in result:
        tempList = []
        tempList.append(each[0])
        tempList.append(each[2].replace(' ', ''))
        tempList.append(jg)
        res.append(tempList)
    return res
Esempio n. 14
0
def getResourceId(url):
    html = getHtml(url).decode('utf-8')
    sign = int(html.find('25bde262-31b4-4901-8d53-527631005f6a'))
    start = int(html.find("<div", sign))
    end = int(html.find("</div>", sign))
    resource = html[start:end]
    sign = int(html.find('60d79024-a7f3-4c73-8b78-b7153fa1f1aa'))
    start = int(html.find("<div", sign))
    end = int(html.find("</div>", sign))
    orgenization = html[start:end]
    reg = r'<a href=".*" id="(.*)" class="list-group-item text-center" title=".*">([\u4e00-\u9fa5]*)</a>'
    p = re.compile(reg)
    orgResult = re.findall(p, orgenization)
    resResult = re.findall(p, resource)
    return orgResult, resResult
Esempio n. 15
0
def getResourceId(url):
    html = getHtml(url).decode('utf-8')
    sign = int(html.find('25bde262-31b4-4901-8d53-527631005f6a'))
    start = int(html.find("<div", sign))
    end = int(html.find("</div>", sign))
    resource = html[start:end]
    sign = int(html.find('60d79024-a7f3-4c73-8b78-b7153fa1f1aa'))
    start = int(html.find("<div", sign))
    end = int(html.find("</div>", sign))
    orgenization = html[start:end]
    reg = r'<a href=".*" id="(.*)" class="list-group-item text-center" title=".*">([\u4e00-\u9fa5]*)</a>'
    p = re.compile(reg)
    orgResult = re.findall(p, orgenization)
    resResult = re.findall(p, resource)
    return orgResult, resResult
Esempio n. 16
0
def getWHInfo(pageUrl):
    html = getHtml(url + pageUrl).decode('utf-8')
    reg = r'<td>\s*(\w*)\r*\s*</td>'
    p = re.compile(reg)
    result = re.findall(p, html)
    return result
Esempio n. 17
0
def getWHAllPageLinked(url):
    html = getHtml(url).decode('utf-8')
    data = json.loads(html)
    return data
Esempio n. 18
0
def getInfo(pageUrl):
    html = getHtml(pageUrl).decode('utf-8')
    reg = r'<span id=".*" class="indent">(.*)</span>'
    p = re.compile(reg)
    result = re.findall(p,html)
    return result
Esempio n. 19
0
def getAllPageLinked(url):
    html = getHtml(url).decode('utf-8')
    data = json.loads(html)
    return data
Esempio n. 20
0
def getInfo(pageUrl):
    html = getHtml(url + pageUrl).decode('utf-8')
    reg = r'<td>\s*(\w*)\r*\s*</td>'
    p = re.compile(reg)
    result = re.findall(p,html)
    return result
Esempio n. 21
0
def getInfo(pageUrl):
    html = getHtml(pageUrl).decode('utf-8')
    reg = r'<span id=".*" class="indent">(.*)</span>'
    p = re.compile(reg)
    result = re.findall(p, html)
    return result