Ejemplo n.º 1
0
def extract_raw_strokes(xml_path, removal_threshold=100.0):
    root = parse(xml_path).getroot()
    stroke_set = root.find("StrokeSet")
    result = []
    for stroke in stroke_set.iter("Stroke"):
        xys = []
        ts = []
        first = stroke.find("Point")
        p_xy, p_t = make_tuple(first)
        for point in stroke.iter("Point"):
            xy, t = make_tuple(point)
            if numpy.linalg.norm(xy - p_xy) > removal_threshold or p_t == t: # maybe noise
                continue
            p_xy = xy
            p_t = t
            xys.append(xy)
            ts.append(t)
        if len(ts) < 2: # unable to interpolate
            continue
        result.append((numpy.array(ts), numpy.array(xys).transpose()))
    return result
Ejemplo n.º 2
0
                for element in xml:

                    if element.tag == "DOCUMENT":
                        doc = document.Document(sla_parent=self)
                        success = doc.fromxml(element)

                        if success:
                            obj.document = doc

                return True, obj

            else:
                return False, obj

        success = False

        if isinstance(xml, lxml.etree._Element):
            success, self = read_xml(self, xml)
        else:
            if os.path.exists(os.path.realpath(xml)):
                xml = lxml.parse(filepath).getroot()
                success, self = read_xml(self, xml)

            else:
                raise TypeError("fromxml requires lxml.etree._Element.")

        return success

# vim:set shiftwidth=4 softtabstop=4 spl=en:
Ejemplo n.º 3
0
def parse_options_data(table):
    rows = table.findall('.//tr')
    header = _unpack(rows[0], kind='th')
    data = [_unpack(r) for r in rows[1:]]
    return TextParser(data, names=header).get_chunk()


if __name__ == '__main__':

    #parsed = parse('http://finance.yahoo.com/q/op?s=AAPL+Options')
    #parsed = parse('http://www-rohan.sdsu.edu/~gawron')

    #parsed = parse('http://www.lajollasurf.org/cgi-bin/plottide.pl')
    url = 'http://www.caferouge.com/menus/mainmenu'
    parsed = parse(url)
    #id="ctl00_ctl00_Content_MCC_RadDatePicker_calendar_Top"

    doc = parsed.getroot()

    links = doc.findall('.//a')

    links_sub_list = links[15:20]
    lnk = links_sub_list[0]

    sample_url = lnk.get('href')

    sample_display_text = lnk.text_content()

    tables = doc.findall('.//table')
    ## Look at tables,  find a table of interest
Ejemplo n.º 4
0
def extract_raw_text(xml_path):
    root = parse(xml_path).getroot()
    transcription = root.find("Transcription")
    return [t.attrib["text"] for t in transcription.iter("TextLine")]