class Applications(object): def __init__(self, user, password): self.logger = Logger() self.xml_parser = xml_parser.XmlParser() self.user = user self.password = password return def get_apps(self): apps = [] try: self.logger.info('Attempting to get applications!!!') resp = requests.post(endpoints.REQUEST_APPS, auth=HTTPBasicAuth(self.user, self.password)) if resp.status_code == 200: response = self.xml_parser.parse_xml(resp.text) for item in response: if item.attrib['app_id'] is not None and not item.attrib[ 'app_id'] in apps: apps.append({ 'app_id': item.attrib['app_id'], 'app_name': item.attrib['app_name'], 'sandboxes': [] }) self.logger.info('Successfully retrieved app list!') else: self.logger.error('Authentication Issue: {}'.format( resp.status_code)) except Exception as e: self.logger.exception('Error retrieving app list: {}'.format(e)) return apps
def create_file(filename, content, mode='a+', encoding='utf-8', format=None): os.chdir(sys.path[0]) try: if format == 'json': content = json.dumps(content, indent=4, sort_keys=True) _file = open(filename, mode=mode, encoding=encoding) _file.write(content) _file.close() except Exception as error: log.error( error ) pass
def download(type, filename, nid, url): if type == 'image': try: response = requests.get(url, stream=True) with open(filename, 'wb') as image: shutil.copyfileobj(response.raw, image) log.success('Imagem baixada com sucesso [{url}]'.format(url=url)) return True except Exception as error: log.error(error) pass
class MonthToIntegerConverter(object): def __init__(self): self.logger = Logger() self.months = [ 'NoneToMakeNumberNotStupid', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' ] def get_month(self, month): try: return self.months.index(month) except Exception as e: self.logger.error( 'Error converting month string to integer value: {}'.format(e))
def set_image( news, index, link ): images_file = 'data/images.json' images = helper.read_file( images_file, format='json' ) if os.path.isfile( images_file ) else [] try: images.append({ 'catalog': news['catalog'], 'notice': news['id'], 'downloaded': False, 'original_path': link, 'new_path': set_image_link( news, index, link ) }) helper.create_file(images_file, images, mode='w', format='json') log.success('Imagem adicionada para a lista de downloads [ {image_link} ]'.format(image_link=set_image_link( news, index, link ))) except Exception as error: log.error( error )
def read_file(filename, format=None, mode='r', encoding='utf-8'): os.chdir(sys.path[0]) if not os.path.isfile(filename): return None else: try: _file = open( filename, mode=mode, encoding=encoding ) _content = _file.read() _file.close() if format == 'json': return json.loads( _content ) else: return _content except Exception as error: log.error( error.args[0] ) pass
def download_news( self, news ): init_crawling = '= Iniciando crawling, alvo: [ {nid} ] {link}'.format(nid=news['id'], link=os.path.basename( news['link'] )) print() log.success( '=' * len( init_crawling ) ) log.success( init_crawling ) log.success( '=' * len( init_crawling ) ) print() request = requests.get( news['link'] ) document = BeautifulSoup( request.text, 'html.parser' ) if request.status_code == 200: return parser.parse_news( news, document ) else: error_message = 'Erro ao acessar a página: Status {status_code}'.format(status_code=request.status_code) self.errors.append( error_message ) log.error( error_message )
def start( self ): for index, news in enumerate( self.news_list, start=0 ): try: if news['status'] == 'pending': news_content = self.download_news( news ) if news_content: self.news_list[ index ]['status'] = 'completed' self.news.append( news_content ) log.success('[ {nid} ] Dados salvos com sucesso!'.format(nid=news['id'])) print() print() else: error_message = 'Não foi possível fazer o parse dos dados.' log.error( error_message ) self.errors.append( error_message ) self.news_list[ index ]['errors'].append( error_message ) else: log.warning('Dados já adquiridos [ {nid} ]'.format(nid=news['id'])) except Exception as error: log.error('Erro ao baixar a notícia [ {nid} ]'.format(nid=news['id'])) log.error(error) pass finally: helper.create_file( filename=self.dump_file, content=self.news, format='json', mode='w') helper.create_file( filename=self.news_json_file, content=self.news_list, format='json', mode='w')
def __init__( self ): super( Images, self ).__init__() self.images_file = 'data/images.json' self.images_folder = 'data/news/' self.dump_file = 'data/news/dump.json' if os.path.isfile( self.images_file ): images = helper.read_file( self.images_file, format='json' ) for index, image in enumerate(images, start=0): try: if not image['downloaded']: path = 'data/{image_path}'.format(image_path=image['new_path'].replace('https://static.weg.net/', '')) filename = os.path.basename( path ) folder = path.split('/') folder.pop() folder = '/'.join( folder ) base_url = 'http://www.weg.net' download_url = image['original_path'] if not os.path.isdir( folder ): os.makedirs(folder, exist_ok=True) if not download_url.startswith('http'): download_url = '{base_url}/{path}'.format(base_url=base_url, path=download_url) if helper.download(type='image', filename=path, nid=index, url=download_url): images[ index ]['downloaded'] = True log.success('Imagem baixada com sucesso [ {path} ]'.format(path=path)) else: log.warning('Imagem já baixada [ {url} ]'.format(url=image['new_path'])) except Exception as error: log.error( error ) finally: helper.create_file(self.images_file, images, mode='w', format='json') else: log.error('[!] Dump de imagens não existe')
class App(object): def __init__(self): self.logger = Logger() user = os.getenv('VC_USER') password = os.getenv('VC_PASS') if not self.should_run(user, password): self.logger.error('User or password is not defined, exiting...') sys.exit() self.sandboxes = Sandboxes(user, password) self.applications = Applications(user, password) self.builds = Builds(user, password) self.spreadsheet_creator = SpreadsheetCreator() apps = self.applications.get_apps() self.get_app_sandboxes(apps) self.get_app_sandbox_builds(apps) self.get_builds(apps) self.create_spreadsheet(apps) self.send_message() def should_run(self, user, password): return user != None and password != None and os.getenv( 'VC_ATTACHMENT_BASE_PATH') != None and os.getenv( 'VC_ADMIN_ADDRESS') != None and os.getenv( 'VC_SMTP') != None and os.getenv( 'VC_SEND_ADDRESS') != None and os.getenv( 'VC_RECIPIENTS') != None def get_app_sandboxes(self, apps): for app in apps: app['sandboxes'] = self.sandboxes.get_sandboxes(app['app_id']) def get_app_sandbox_builds(self, apps): for app in apps: for sandbox in app['sandboxes']: sandbox['builds'] = self.builds.get_builds( app['app_id'], sandbox['sandbox_id']) def get_builds(self, apps): for app in apps: for sandbox in app['sandboxes']: for build in sandbox['builds']: build['submitter'] = self.builds.get_build( app['app_id'], build['build_id']) def create_spreadsheet(self, apps): self.spreadsheet_creator.write_spreadsheet(apps) def send_message(self): if self.spreadsheet_creator.have_data: Mailer( os.getenv('VC_SEND_ADDRESS'), os.getenv('VC_RECIPIENTS'), constants.MESSAGE_SUBJECT, os.getenv('VC_SMTP'), 25, constants.BUILDS_FOUND_MESSAGE_TEXT.format( os.getenv('VC_ADMIN_ADDRESS')), self.spreadsheet_creator.filename) else: Mailer( os.getenv('VC_SEND_ADDRESS'), os.getenv('VC_RECIPIENTS'), constants.MESSAGE_SUBJECT, os.getenv('VC_SMTP'), 25, constants.BUILDS_NOT_FOUND_MESSAGE_TEXT.format( os.getenv('VC_ADMIN_ADDRESS')))
log.error(error) pass finally: helper.create_file( filename=self.dump_file, content=self.news, format='json', mode='w') helper.create_file( filename=self.news_json_file, content=self.news_list, format='json', mode='w') if __name__ == '__main__': scrapper = Scrapper() try: scrapper.start() except Exception as error: print() error_message = 'Erro ao iniciar processo: {proccess}'.format(proccess=scrapper.proccess) log.error('=' * len( error_message )) log.error( error_message ) log.error(error) log.error('=' * len( error_message )) print() finally: finished_with_errors = 'Finalizado com {errors} erro{suffix}'.format(errors=len( scrapper.errors ), suffix='s' if len( scrapper.errors ) > 1 else '') finished_without_errors = 'Finalizado sem erros' if scrapper.errors: print() log.warning( '=' * len( finished_with_errors ) ) log.warning( finished_with_errors ) else: print() log.success( '=' * len( finished_without_errors ) )