self.mongo = pymongodb.MongoDB('btt') self.documents = self.mongo.find({}, 'en_altcoins') self.form_url = '' self.client = requests.sessions.Session() @decorators.log def send_data(self, *args): time.sleep(2) params = { 'entry.150819782': args[0], 'entry.1803166929': args[1], 'entry.750777297': args[2], } r = self.client.post(self.form_url, params) template = f'[STATUS CODE] {r.status_code} [POST TITLE] {args[0]}' return template def run(self): for document in self.documents: self.send_data(document['title'], document['link'], document['topic_started_date']) if __name__ == '__main__': try: AddNewPosts().run() except: utils.logger('Success status: %s' % 'ERROR', 'script.log')
# Parse every diapason of pages in own process. for range_ in ranges: Process(target=self.parse_range, args=(url, range_)).start() @decorators.log def run(self): """ Method which start parsing all ico categories and then data from all categories. :return: """ self.mongo.drop_database() # Drop db for parse new data. self.parse_cats() # Parse all ico categories. self.parse_cats_data( self.get_cats_documents()) # Parse data from all categories. # Parse specific urls like ...ongoing , ...upcoming, ...ended. for url in self.SPECIFIC_URLS_LIST: ps = Process(target=self.parse_specific_ulrs, args=(url, )) ps.start() ps.join() # Sort collections ended, upcoming, ongoing in mongo. utils.sort_col_docs() if __name__ == '__main__': try: Parser().run() except: utils.logger('Success status: %s' % 'ERROR', 'moonwalker.log')
self.parse_markets(self.get_html(url)) for url in full_desc_links ] for i in range(len(currencies_names)): self.write_data(currency=currencies_names[i], markets=markets[i].markets, markets_count=markets[i].amount) def run(self): """ 1. Get html with currencies on one page. 2. Find current currencies amount , then split this number on rages (need for multiprocessing). 3. Start processes. :return: """ html = self.get_html(self.url_with_all_crypto) current_crypto_amount = self.parse_current_amount(html) ranges = utils.split_on_ranges( (lambda x: (x / 100).__round__())(int(current_crypto_amount)), self.processes_num) for range_ in ranges: Process(target=self.parse_range, args=(range_, )).start() if __name__ == '__main__': try: Parser().run() except: utils.logger('Success status: %s' % 'ERROR', 'aki_adagaki.log')
self.write_data(img=data_lst[0][i], full_description=data_lst[1][i], name=data_lst[2][i], date=data_lst[3][i]) @decorators.log def run(self): """ Try to get last new from db and parse data. :return: """ self.get_last_new() # Parse data. page_count = 1 while True: self.parse(page_count) if self.next is False: break page_count += 1 if __name__ == '__main__': try: Parser().run() except: utils.logger('Success status: %s' % 'ERROR', 'ailachi.log')
for i in range(len(titles)): self.write_data((lambda x: (x / 40).__round__())(page_num), title=titles[i], link=links[i], topic_started_date=topic_started_dates[0]) def run(self): """ Method which run parser. First - parse last page number , then split it on ranges. Second - create processes which parse received ranges. :return: """ self.parse_last_page_num() # Find last page number. self.last_page_num = 100 if self.mongo_ else self.last_page_num # Set 100 pages for not clear db. ranges = utils.split_on_ranges(self.last_page_num, self.processes_num, 40) # Split LPN on ranges. # Parse pages by ranges in own process. [ Process(target=self.parse_range, args=(range_, )).start() for range_ in ranges ] if __name__ == '__main__': try: Parser().run() except: utils.logger('Success status: %s' % 'ERROR', 'tajga.log')
self.write_data(title=data_lst[0][i], full_desc_link=data_lst[1][i], img_src=data_lst[2][i], date=data_lst[3][i]) @decorators.log def run(self): """ Try to get last new from db and parse data. :return: """ self.get_last_new() # Get last new from db. # Parse data. page_count = 0 while True: self.parse(page_count) if self.next is False: break page_count += 1 if __name__ == '__main__': try: Parser().run() except: utils.logger('Success status: %s' % 'ERROR', 'jiraya.log')