예제 #1
0
def populate(options):
    start_engine(options)
    archives = html_parse(options.url).xpath('//td/a/text()')
    state = ChatState()
    state.parser.parse(archives, options)
    session.add_all(state.objects())
    session.commit()
예제 #2
0
def parse_playlist_form(fp):
    show_date = None
    doc = html_parse(fp)
    inputs = {}
    for input in doc.getroot().body.xpath('//input[@type="text"]'):
        if input.attrib['name'] == 'date':
            show_date = date_parse(input.attrib['value'])
        elif PATTERN.match(input.attrib['name']):
            field,num = PATTERN.match(input.attrib['name']).groups()
            if field == 'timestamp':
                try:
                    time = date_parse(input.attrib['value'])
                    real_date = date_parse(show_date.strftime('%Y-%m-%d') + ' ' + time.strftime('%H:%M:%S'))
                    d = inputs.setdefault(int(num),{})
                    d['unix_time'] = mktime(real_date.timetuple())
                    d['date_str'] = real_date.strftime('%Y-%m-%d')
                    d['time_str'] = real_date.strftime('%H:%M:%S')
                except: pass # 'auto' probably
            else:
                try:
                    value = input.attrib['value']
                    inputs.setdefault(int(num),{})[field] = value
                except KeyError: pass
                
    show = []
    for k in sorted(inputs):
        d = dict((key,inputs[k].get(key,None)) for key in KEEPER_KEYS)
        if filter(None,d.values()):
            show.append(d)
    return (show[0]['date_str'],show)
예제 #3
0
def show_links():
    a = CSSSelector('a').path
    doc = html_parse(urlopen(DJ_SEARCH_URL))
    for link in doc.getroot().xpath(a):
        href = link.attrib.get('href')
        if href and href.startswith('../archive/?date='):
            yield urljoin(DJ_SEARCH_URL,href)
예제 #4
0
    def _iter(self, fp, *args, **kwargs):
        """Read and parse an XML file to dict."""
        # NOTE: We parse HTML, to skip XML validation and strip XML namespaces
        xml_tree = html_parse(fp).getroot()
        record = self._etree_to_dict(xml_tree)["html"]["body"].get("record")

        if not record:
            raise ReaderError(f"Record not found in XML entry.")

        yield record
예제 #5
0
 def from_path(cls, path: Union[Path, str]) -> Topic:
     """Initialize function from path."""
     return (
         lambda parts: cls.from_etree(
             html_parse(str(path)),
             topic=parts[2],
             area=parts[0],
             theme=parts[1],
         )
     )(Path(path).stem.split(" 分 "))
예제 #6
0
def html_tree():
    path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                        'fixtures', 'article.html')
    return html_parse(path)
예제 #7
0
def html_gallery_tree():
    path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                        "fixtures", "gallery.html")
    return html_parse(path)
예제 #8
0
def html_article_tree():
    path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                        "fixtures", "article.html")
    return html_parse(path)