try: from banner import Banner from colors import Colors from bs4 import BeautifulSoup import re import requests import requests.exceptions from collections import deque except ImportError: print 'Install all required libraries' sys.exit(1) colors = Colors() RED = colors.red() GREEN = colors.green() BLUE = colors.blue() RESET = colors.reset() class EmailHarvester: def __init__(self, urls): self.urls = urls self.completed_urls = set() self.scrapped_emails = set() self.tmp_emails = set() def scrape_emails(self): try: while len(self.urls):
ERR_NUMBER_CONVERTION = "Error al convertir el número" ERR_REGX = "Error al ejecutar regex_compare" ERROR_BADCONFIG = "La linea '{0}' del archivo de configuraciones '{1}' no es valida" ERROR_BADINDEXCONFIG = "El índice seleccionado <{0}> no pertenece a las configuraciones cargadas" ERROR_BADLAUNCHBIN = "La clase debe ser importada desde bin" ERROR_BADPARAMETERTYPE = "Error en el tipo de un parámetro" ERROR_BADPARAMETERTYPE_MSG = "El parámetro {0} debe ser del tipo {1}" ERROR_BADSOURCEFOLDER = "La nueva dirección del 'Source Folder' no es una carpeta" ERROR_BADWD = "La nueva dirección del 'Working Directory' no es una carpeta" ERROR_CANTTRANSLATE = "El texto no se puede traducir" ERROR_CONFIGBADEXPORT = "No se pudo guardar el archivo de configuraciones" ERROR_CONFIGCORRUPT = "El archivo de configuraciones '{0}' está corrupto" ERROR_CONFIGNOTEXISTENT = "El parámetro <{0}> no existe en las configuraciones" ERROR_CREATE_MENU = "No se puede crear el menu inicial, posible error en archivo de configuraciones" ERROR_GETTING_OS = "Ocurrió un error al obtener el tipo de sistema operativo" ERROR_HEADER = COLOR.red() + "[ERROR] " + COLOR.end() ERROR_IMPORTERROREXTERNAL = "Ha ocurrido un error al importar las librerías de sistema externas" ERROR_IMPORTERRORINTERNAL = "Ha ocurrido un error al importar las librerías internas de la aplicación" ERROR_IMPORTERRORMECHANIZE = "Ha ocurrido un error al importar la librería mechanize" ERROR_IMPORTSYSTEMERROR = "Ha ocurrido un error al importar las librerías de sistema" ERROR_IMPORTWCONIO = "Error al importar WConio" ERROR_LANGBADINDEX = "El índice <{0}> debe ser un numero entero mayor o igual a 10" ERROR_LANGNOTEXIST = "ID[{0}] no existe en el archivo de idiomas <{1}>" ERROR_MATPLOTLIB_NOT_INSTALLED = "La librería gráfica matplotlib no esta instalada. Pruebe utilizando el comando 'pip install matplotlib' en la terminal" ERROR_NOCONFIGFILE = "No existe archivo de configuraciones '{0}'" ERROR_NOFILES = "No hay archivos" ERROR_NOLANGDEFINED = "El idioma no existe y/o no ha sido definido" ERROR_NOLANGFILE = "No existe el archivo de idiomas '{0}'" ERROR_NOTRANSLATECONECTION = "No se pudo establecer comunicación con el servidor de traducciones" ERROR_RARNOTINSTALLED_NOTWIN = "Se requieren de algunas librerías para poder descomprimir archivos RAR. Pruebe utilizando el comando 'pip install pyunpack, easyprocess, patool' en la terminal" ERROR_RARNOTINSTALLED_WIN = (
class Scanner: def __init__(self, project='Untitled', source='.', ignored_paths=None, color=False, filter_obj=None, verbose=False, outfile=None, quiet=False): """ Constructor. :type project: str :param project: str :param ignored_paths: bool :param source: str :param color: bool :param filter_obj: lib.Filter() :param verbose: bool :param outfile: str :param quiet: bool """ self.project = project self.source = source self.color = False if color: self.color = Colors() self.filter = filter_obj self.verbose = verbose self.outfile = outfile self.fp = None self.html = '' self.quiet = quiet if ignored_paths: paths = ignored_paths.split(':') for path in paths: self.filter.ignored_paths.append(re.compile(path, self.filter.REGEX_FLAGS)) self.html = open('resources/output_template.html', 'r').read() self.fp = open(self.outfile, 'w') self.html = self.html.replace('###DIRECTORY###', self.source) self.html = self.html.replace('###PROJECT###', self.project) def scan(self): """ Crawls all eligible files in a folder and scans each line of eligible files for potential clear-text passwords. """ def signal_handler(sig, frame): """ Handles Ctrl+C being pressed (SIGINT) :param sig: Unused :param frame: Unused :return: void """ self.cleanup(interrupted=True) signal.signal(signal.SIGINT, signal_handler) """ Start walking the directories... """ for root, sub_folders, files in os.walk(self.source): for filename in files: full_path = os.path.join(root, filename) if not self.filter.is_ignored_file(filename) and not self.filter.is_ignored_file(root): """ Skip files beginning with a period. If there is no file extension, use file name. """ if re.match(r"^\.", filename): continue try: garbage, extension = os.path.splitext(full_path) try: extension = extension.split('.')[1] except IndexError: pass try: pattern = self.filter.patterns_by_filetype[extension] except KeyError: """ Key not found in lookup table in filter.py """ continue if pattern: if not self.quiet: if self.verbose: sys.stdout.write("\nScanning {0}".format(full_path)) sys.stdout.flush() else: sys.stdout.write('.') sys.stdout.flush() line_number = 0 random.seed(time.time()) filep = open(full_path, 'r') if filep.read(3) == '/*!': """ Ignore vendor JavaScript files which commonly begin with '/*!' to tell YUI compressor not to remove their header comment. """ continue for line in filep: rnum = random.randint(1, 1000000) line_number += 1 if self.filter.is_ignored_pattern(line): continue def search_in_line(_pattern, _line): match = _pattern.search(_line) if match: _line = _line.strip() if re.match('<|>', _line, re.MULTILINE): _line = re.sub('<', '<', _line, re.MULTILINE) _line = re.sub('>', '>', _line, re.MULTILINE) try: password = match.group(2).strip() except IndexError: password = match.group(1).strip() if not password: password = match.group(0).strip() if password: if not self.quiet: if self.color: print "\n{0}:{1}: {2}".format( self.color.light_gray(full_path), self.color.light_blue(str(line_number)), _line.replace(password, self.color.red(password) )) else: print "\n{0}:{1}: {2}".format(full_path, str(line_number), _line) """ Output to HTML file """ highlight = _line.replace(password, '<span class="highlight">{0}</span>'.format(password)) self.html = self.html.replace( '###OUTPUT###', '<tr>' '<td>{0}:<span class="line-number">{1}</span></td><td><b>{2}</b>' '<span class="expand" id="expand-{3}">[+]</span>' '<div class="hidden" id="hidden-{4}"><code>{5}</code></div></td>' '</tr>###OUTPUT###'.format( full_path, str(line_number), password, str(rnum), str(rnum), highlight )) if type(pattern) is list: for p in pattern: search_in_line(p, line) else: search_in_line(pattern, line) filep.close() else: """ File doesn't match filter criteria """ continue except Exception, e: print full_path print '{0}: {1}'.format(str(e.__class__), str(e)) raise self.cleanup()