def main(): botlib.runbot({ 'pipes': [ define('places'), ], 'tasks': [ task('places').daily().clean().append(query_places(), progress='places'), task('places').export('data/osm/places.csv', include=[ 'osm_id', 'type', 'place', 'population', 'wikipedia_title', 'wikipedia_lang', 'lon', 'lat', 'admin_level_6_osm_id', 'admin_level_6', 'admin_level_5_osm_id', 'admin_level_5', 'admin_level_4_osm_id', 'admin_level_4', ]) ], })
'question-url': row.key, 'date': 'xpath:./ancestor::table//tr[contains(th/text(), "Posėdžio data")]/td/text()', 'status': 'xpath:./ancestor::table//tr[contains(th/text(), "Būsena")]/td/text()', }) ]) with bot.pipe('attachment preview links'): with bot.pipe('attachment preview').download(update={'source': row.value}): key = call(clean_redirect_url, 'xpath:/html/head/meta[@http-equiv="refresh"]/@content?') with bot.pipe('attachment preview links').select([(key, row.value['source'])]): bot.pipe('attachment preview').download(update={'source': row.value}) with bot.pipe('attachment links'): bot.pipe('attachments').download(update={ 'question-url': row.value['question-url'], 'date': row.value['date'], 'status': row.value['status'], }) bot.pipe('attachment preview').export('data/vilnius/vtaryba/attachment-previews.csv', include=['key', 'size', 'source'], update={ 'size': row.value['content'].length, }) bot.pipe('attachments').export('data/vilnius/vtaryba/attachments.csv', update=attachment_export) bot.compact() if __name__ == '__main__': botlib.runbot(define, run)
def define(bot): bot.define('street suggestions') bot.define('streets') bot.define('pages') bot.define('extract search results') bot.define('osm addresses') bot.define('extract osm addresses') def run(bot): bot.compact() start_url = 'http://www.manogyvunai.lt/m/m_animalproblems/files/ajax_workaround.php' if bot.pipe('street suggestions').data.count() == 0: for letter in list(string.ascii_lowercase): query = urlencode({'getStreetsByLetters': '1', 'letters': letter}) resp = requests.post('%s?%s' % (start_url, query)) bot.pipe('street suggestions').append(letter, resp.text) with bot.pipe('street suggestions'): with bot.pipe('streets').call(extract_streets).dedup(): with bot.pipe('pages').call(download_page): bot.pipe('extract search results').call(extract_search_results) bot.pipe('extract search results').export('data/gyvunai.csv') if __name__ == '__main__': botlib.runbot(define, run)
# Vardai task('vardai-puslapiai', 'vardai').select( this.key.urlparse().path, { 'lytis': select('#page-left xpath:.//h1[1]/@class'), 'vardas': select('#page-left xpath:.//h1[1]/strong/text()'), 'kilmė': select( '#name-info xpath:./p[strong/text() = "Vardo kilmė:"]/text()?' ).null().strip(), 'vardadienis': select( '#name-info xpath:./p[strong/text() = "Vardadienis:"]/text()?' ).null().replace('\xa0', ' ').strip(), 'reikšmė': select( '#name-info xpath:./p[strong/text() = "Vardo reikšmė:"]?'). null().text(exclude=['xpath:./strong[1]']), 'panašūs vardai': [ '#name-info xpath:./p[strong/text() = "Panašūs ir giminingi vardai:"]/a/text()' ], 'populiarumas': this.value.content.apply(populiarumas), }), ], } if __name__ == '__main__': botlib.runbot(pipeline)