Ejemplo n.º 1
0
def processInNewDriver(href):
    '''
    处理文章链接
    '''
    print "处理链接", href
    driverContent = webdriver.Chrome(executable_path=executable_path)
    driverContent.get(href)

    retry(lambda: driver.find_element_by_class_name("rprt_all"))
    contentEle = driverContent.find_element_by_class_name("rprt_all")
    content = contentEle.text
    for key in keys:
        if content.find(key) != -1:
            print "find"
            reusltData.append([href, key])
            write(reusltData, FILENAME_RESULT)
            break
    recoreDate.append([href])
    driverContent.quit()
Ejemplo n.º 2
0
#!/usr/bin/python
# -*-coding:UTF-8-*-
# encoding=utf8
from com.office.util.excelUtil import write
from com.office.util.wordUtil import readDocx

content = readDocx("/Users/yangjie/Downloads/qq/(最完整版)胡希恕讲伤寒论.docx")
lines = content.split(u"。")
resultData = []
for line in lines:
    resultData.append([line])
write(resultData, "/Users/yangjie/Downloads/qq/(最完整版)胡希恕讲伤寒论.xls")
Ejemplo n.º 3
0
#!/usr/bin/python
# -*-coding:UTF-8-*-
# encoding=utf8
import matplotlib.pyplot as plt
import numpy as np
from com.office.util.excelUtil import write
plt.figure(figsize=(9, 6), dpi=100)
n = 100
#rand 均匀分布和 randn高斯分布
r = []
for i in range(6):
    x = np.random.randn(1, n)
    r.append(x[0])
write(r, "scatterplot.xls")

print x
y = np.random.randn(1, n)
print y
T = np.arctan2(x, y)
print T
plt.scatter(x, y, c=T, s=25, alpha=0.4, marker='o')
#T:散点的颜色
#s:散点的大小
#alpha:是透明程度
plt.show()
Ejemplo n.º 4
0
                        )
                        Molecular2 = Moleculars1[1].text
                        result.append([key, ID2, Molecular2])
                        pubChem.recorddata.append([key])
                        drivercontent.quit()
                        return
            except Exception, e:
                result.append([key, "NA", "NA"])
                pubChem.recorddata.append([key])
                break
        else:
            driver.get("http://www.ncbi.nlm.nih.gov/pccompound")


mylists.reverse()
for key in mylists:
    if key in pubChem.recorddata:
        print "chuliguo"
        continue
    else:
        try:
            key = key[0]
            print "处理", key
            getchemID(key)
            write(result, pubChem.resultPathname)
            write(pubChem.recorddata, pubChem.recordPathname)
        except Exception, e:
            import traceback
            print traceback.format_exc()
            print "3", e
Ejemplo n.º 5
0
    if rowDate[2] != "":
        cacheline = []
        cacheline.append(rowDate[2])
        if rowDate[3] != "":
            lastSrc = rowDate[3]
        cacheline.append(lastSrc)
        cache.append(cacheline)

resultDate = []
for rowDate in srcDate:
    newLine = []
    newLine.append(rowDate[0])
    if rowDate[1] != "":
        newLine.append(rowDate[1])
        newLine.append("TCMID")
    else:
        if len(cache) != 0:
            takeCache = cache[0]
            cache.remove(takeCache)
            newLine.extend(takeCache)
    resultDate.append(newLine)

for cacheData in cache:
    newLine = []
    newLine.append("")
    newLine.extend(cacheData)
    resultDate.append(newLine)

print resultDate
write(resultDate, "六神曲_res.xls")
Ejemplo n.º 6
0
#!/usr/bin/python
#-*-coding:UTF-8-*-
# encoding=utf8
from com.office.util.excelUtil import write

resultDate = []
for i in range(3):
    resultDate.append(range(3))

print resultDate

write(resultDate, "num.xls")
Ejemplo n.º 7
0
    recoreDate.append([href])
    driverContent.quit()


while True:
    retry(lambda: driver.find_elements_by_xpath("//p[@class='title']/a"))
    aEles = driver.find_elements_by_xpath("//p[@class='title']/a")
    hrefs = []
    for aEle in aEles:
        href = aEle.get_attribute("href")
        if href in matchRecordData:
            print "processd ", href
        else:
            hrefs.append(href)
    if 0 == len(hrefs):
        print "no new href"
    else:
        doInThread(processInNewDriver, hrefs, poolNum=4)
        write(recoreDate, FILENAME_RECORD)

    try:
        driver.find_element_by_xpath(
            "//a[@id='EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Entrez_Pager.Page' and @sid='3']"
        ).send_keys(Keys.ENTER)
        print "下一页"
        import time
        time.sleep(1)
    except Exception, e:
        print "已到尾页"
        break