Python parsedate Examples

Programming Language: Python

Namespace/Package Name: datetool

Method/Function: parsedate

Examples at hotexamples.com: 2

Python parsedate - 2 examples found. These are the top rated real world Python examples of datetool.parsedate extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: common.py Project: scraperdragon/gov.uk

def scrape_list_page(url):
    print "Scraping %s" % url
    req = requests.get(url)
    doc = lxml.html.fromstring(req.text)

    items = []
    for item in doc.xpath("//*[@class='newspanel clearDiv']"):
        full_title = etree.tostring(item.xpath("./h2")[0], method="text", encoding='utf8')
        date, title = [x.strip() for x in full_title.split("|")]
       
        attachments = [] 
        raw_links = item.xpath(".//a/@href")
        for raw_link in raw_links:
             attachment = urlparse.urljoin(url, raw_link)
             attachment_title = item.xpath(".//a/text()")[0]
             attachments.append({"link": attachment,
                                 "title": attachment_title})

        try:
            summary = item.xpath("./p")[0].text
        except IndexError:
            summary = ""
   
        items.append({"Title": title,
                      "Publication date": datetool.parsedate(date),
                      "Old URL": url,
                      "Summary": summary,
                      "Attachments": json.dumps(attachments),
                      "Associated organisations": "Scotland Office"})
        
    return items

Example #2

Show file

File: stats.py Project: scraperdragon/gov.uk

                 "Associated Document Series": ""}, "statistics")
dt.create_index(["Title", "Old URL"], "statistics", unique=True)

for link in doc.xpath("//div[@class='wrapper']/ul/li/a"):
    series_title, series_url = link.text, urlparse.urljoin(URL, link.attrib["href"])
    print series_title

    series_req = requests.get(series_url)
    series_doc = lxml.html.fromstring(series_req.text)

    for table_line in series_doc.xpath("//tr[not(@bgcolor) or @bgcolor!='#004093']"):
        file_pub_date = table_line.xpath("./td[3]")[0].text

        for file_node in table_line.xpath("./td[2]//a"):
            file_title = etree.tostring(file_node, method="text", encoding="utf8")
            file_link = file_node.attrib["href"]
            if not file_link.startswith("http"):
                file_link = urlparse.urljoin(URL, file_link)

            file_data = {"Old URL": series_url,
                         "Title": file_title,
                         "Body": file_title,
                         "Publication date": datetool.parsedate(file_pub_date),
                         "Attachment": file_link,
                         "Attachment title": file_title,
                         "Associated organisations": "Scotland Office",
                         "Associated Document Series": series_title}
            dt.upsert(file_data, "statistics")

dumptruck_to_csv(dt, "statistics", "/home/http/scotland/stats.csv")