def fetch_latest_document (symbol, form_type):
    """Get the most recent document filing page for this symbol and form type,
    find the link to the complete submission (sgml) text, and return the html
    of the given filing."""

    document_links = fetch_document_links (symbol, form_type)
    if len(document_links) > 0:
        # the first link is the latest one
        document_detail_page = load_url(document_links[0], USER_AGENT)
        if document_detail_page is not None:

            # find the sgml text link containing the full submission
            tree = parse_html(document_detail_page)
            sgml_link = None

            for node in tree.xpath('//a'):
                if node.attrib.has_key('href'):
                    # the first url pointing to a '.txt' file is the complete submission
                    try:
                        if path.splitext(node.attrib['href'])[1] == '.txt':
                            sgml_link = ''.join([SEC_ROOT_URL, node.attrib['href']])
                            break
                    except IndexError:
                        pass

            # with the complete submission link in hand, load it and return it
            if sgml_link is not None:
                return load_url(sgml_link, USER_AGENT)
Exemple #2
0
    def run(self):
        # get the current weather for NYC from the National Weather Service feed
        weather_xml = load_url(
            'http://forecast.weather.gov/MapClick.php?lat=40.71980&lon=-73.99300&FcstType=dwml'
        )

        if weather_xml is None:
            # there was an error gettting the weather data
            send('NYC Weather',
                 'Sorry, this service is temporarily unavailable',
                 recipient_list=[self.sender],
                 sender=server_auto_email)
        else:
            # parse the report from the xml and auto-reply with it as the message body
            doc = etree.fromstring(weather_xml)

            # find the human-readable text report in the xml
            report = []
            for elem in doc.xpath('//wordedForecast'):
                for subelem in elem.getchildren():
                    if subelem.tag == 'text':
                        report.append(subelem.text)

            # send it back to the sender
            send('NYC Weather',
                 ' '.join(report),
                 recipient_list=[self.sender],
                 sender=server_auto_email)
Exemple #3
0
    def run(self):
        # get the time as an html page result from the US Naval Observatory Master Clock
        time_html = load_url('http://tycho.usno.navy.mil/cgi-bin/timer.pl')

        if time_html is None:
            # there was an error gettting the time data
            send('The Current Time', 'Sorry, this service is temporarily unavailable', recipient_list=[self.sender], sender=server_auto_email)
        else:
            # auto-reply with both the text and html versions of the time report
            time_txt = get_text_from_html(time_html.replace('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final"//EN>', ''))
            send('The Current Time', time_txt, recipient_list=[self.sender], html=time_html, sender=server_auto_email)
def fetch_document_links (symbol, form_type):
    """Search for the most recent sec form_type filings for symbol and 
    and return a list of the most recent links which point to document
    filing detail pages (the actual document will be a single link within
    the document filing detail page)."""

    results = []
    matches = load_url(SEARCH_URL.substitute(symbol=symbol, form=form_type, root=SEC_ROOT_URL), USER_AGENT)
    if matches is not None:
        tree = parse_html(matches)
        for node in tree.xpath('//a[@id="documentsbutton"]'):
            if node.attrib.has_key('href'):
                results.append( ''.join([SEC_ROOT_URL, node.attrib['href']]) )
    return results
Exemple #5
0
    def run(self):
        # get the current weather for NYC from the National Weather Service feed
        weather_xml = load_url('http://forecast.weather.gov/MapClick.php?lat=40.71980&lon=-73.99300&FcstType=dwml')

        if weather_xml is None:
            # there was an error gettting the weather data
            send('NYC Weather', 'Sorry, this service is temporarily unavailable', recipient_list=[self.sender], sender=server_auto_email)
        else:
            # parse the report from the xml and auto-reply with it as the message body
            doc = etree.fromstring(weather_xml)

            # find the human-readable text report in the xml
            report = []
            for elem in doc.xpath('//wordedForecast'):
                for subelem in elem.getchildren():
                    if subelem.tag == 'text':
                        report.append(subelem.text)

            # send it back to the sender
            send('NYC Weather', ' '.join(report), recipient_list=[self.sender], sender=server_auto_email)
Exemple #6
0
    def run(self):
        # get the time as an html page result from the US Naval Observatory Master Clock
        time_html = load_url('http://tycho.usno.navy.mil/cgi-bin/timer.pl')

        if time_html is None:
            # there was an error gettting the time data
            send('The Current Time',
                 'Sorry, this service is temporarily unavailable',
                 recipient_list=[self.sender],
                 sender=server_auto_email)
        else:
            # auto-reply with both the text and html versions of the time report
            time_txt = get_text_from_html(
                time_html.replace(
                    '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final"//EN>',
                    ''))
            send('The Current Time',
                 time_txt,
                 recipient_list=[self.sender],
                 html=time_html,
                 sender=server_auto_email)