def __init__(self, trace_path): """ <Purpose> Creates an StraceParser object containing all the information needed to extract data from a trace file generated by the strace utility. <Arguments> trace_path: The path to the trace file containing the traced system calls. This file should contain the output of the strace utility. <Exceptions> IOError: If the pickle file containing the system call definitions is not found. (this file should come as part of this program) <Side Effects> None <Returns> None """ Parser.__init__(self, trace_path) # regex compiled for _parse_line # # Example unfinished syscall. # accept(3, <unfinished ...> # # Example complete syscall. # socket(PF_INET, SOCK_STREAM, IPPROTO_IP) = 5 <0.000066> # # Example resumed syscall. # <... accept resumed> {sa_family=AF_INET, sin_port=htons(44289), sin_addr=inet_addr("127.0.0.1")}, [16]) = 4 <0.002020> # # Example unfinished syscall due to end of program. # nanosleep({...}, <unfinished ... exit status 0> self._re_unfinished_syscall = re.compile( r"([^(]+)\((.*)\<unfinished .*") self._re_resumed_syscall = re.compile( r"\<\.\.\. ([^ ]+) resumed\> (.*)\)[ ]+=[ ]+([a-fx\d\-?]+)(.*)") self._re_complete_syscall = re.compile( r"([^(]+)\((.*)\)[ ]+=[ ]+([a-fx\d\-?]+)(.*)")
def __init__(self, trace_path): """ <Purpose> Creates an StraceParser object containing all the information needed to extract data from a trace file generated by the strace utility. <Arguments> trace_path: The path to the trace file containing the traced system calls. This file should contain the output of the strace utility. <Exceptions> IOError: If the pickle file containing the system call definitions is not found. (this file should come as part of this program) <Side Effects> None <Returns> None """ Parser.__init__(self, trace_path) # regex compiled for _parse_line # # Example unfinished syscall. # accept(3, <unfinished ...> # # Example complete syscall. # socket(PF_INET, SOCK_STREAM, IPPROTO_IP) = 5 <0.000066> # # Example resumed syscall. # <... accept resumed> {sa_family=AF_INET, sin_port=htons(44289), sin_addr=inet_addr("127.0.0.1")}, [16]) = 4 <0.002020> # # Example unfinished syscall due to end of program. # nanosleep({...}, <unfinished ... exit status 0> self._re_unfinished_syscall = re.compile(r"([^(]+)\((.*)\<unfinished .*") self._re_resumed_syscall = re.compile(r"\<\.\.\. ([^ ]+) resumed\> (.*)\)[ ]+=[ ]+([a-fx\d\-?]+)(.*)") self._re_complete_syscall = re.compile(r"([^(]+)\((.*)\)[ ]+=[ ]+([a-fx\d\-?]+)(.*)")
def run(self): """This method run the parser with the needed arguments """ arguments = self._run_arg_parse() resource_parser = Parser(**vars(arguments)) resource_parser.parse()
def run(self): try: self.updateProgressObservable.start() session = self.db.session() self.tsLog("Parsing nmap xml file: " + self.filename) startTime = time() try: parser = Parser(self.filename) except: self.tsLog('Giving up on import due to previous errors.') self.tsLog("Unexpected error: {0}".format(sys.exc_info()[0])) self.done.emit() return self.db.dbsemaphore.acquire( ) # ensure that while this thread is running, no one else can write to the DB s = parser.getSession() # nmap session info if s: n = nmapSessionObj(self.filename, s.startTime, s.finish_time, s.nmapVersion, s.scanArgs, s.totalHosts, s.upHosts, s.downHosts) session.add(n) allHosts = parser.getAllHosts() hostCount = len(allHosts) if hostCount == 0: # to fix a division by zero if we ran nmap on one host hostCount = 1 totalprogress = 0 self.updateProgressObservable.updateProgress(totalprogress) createProgress = 0 createOsNodesProgress = 0 createPortsProgress = 0 for h in allHosts: # create all the hosts that need to be created db_host = self.hostRepository.getHostInformation(h.ip) if not db_host: # if host doesn't exist in DB, create it first hid = hostObj(osMatch='', osAccuracy='', ip=h.ip, ipv4=h.ipv4, ipv6=h.ipv6, macaddr=h.macaddr, status=h.status, hostname=h.hostname, vendor=h.vendor, uptime=h.uptime, lastboot=h.lastboot, distance=h.distance, state=h.state, count=h.count) self.tsLog("Adding db_host") session.add(hid) t_note = note(h.ip, 'Added by nmap') session.add(t_note) else: self.tsLog("Found db_host already in db") createProgress = createProgress + ((100.0 / hostCount) / 5) totalprogress = totalprogress + createProgress self.updateProgressObservable.updateProgress( int(totalprogress)) session.commit() for h in allHosts: # create all OS, service and port objects that need to be created self.tsLog("Processing h {ip}".format(ip=h.ip)) db_host = self.hostRepository.getHostInformation(h.ip) if db_host: self.tsLog( "Found db_host during os/ports/service processing") else: self.log( "Did not find db_host during os/ports/service processing" ) os_nodes = h.getOs() # parse and store all the OS nodes self.tsLog(" 'os_nodes' to process: {os_nodes}".format( os_nodes=str(len(os_nodes)))) for os in os_nodes: self.tsLog( " Processing os obj {os}".format(os=str(os.name))) db_os = session.query(osObj).filter_by( hostId=db_host.id).filter_by(name=os.name).filter_by( family=os.family).filter_by( generation=os.generation).filter_by( osType=os.osType).filter_by( vendor=os.vendor).first() if not db_os: t_osObj = osObj(os.name, os.family, os.generation, os.osType, os.vendor, os.accuracy, db_host.id) session.add(t_osObj) createOsNodesProgress = createOsNodesProgress + ( (100.0 / hostCount) / 5) totalprogress = totalprogress + createOsNodesProgress self.updateProgressObservable.updateProgress( int(totalprogress)) session.commit() all_ports = h.all_ports() self.tsLog(" 'ports' to process: {all_ports}".format( all_ports=str(len(all_ports)))) for p in all_ports: # parse the ports self.tsLog(" Processing port obj {port}".format( port=str(p.portId))) s = p.getService() if not ( s is None ): # check if service already exists to avoid adding duplicates # print(" Found service {service} for port {port}".format(service=str(s.name),port=str(p.portId))) # db_service = session.query(serviceObj).filter_by(name=s.name).filter_by(product=s.product).filter_by(version=s.version).filter_by(extrainfo=s.extrainfo).filter_by(fingerprint=s.fingerprint).first() db_service = session.query(serviceObj).filter_by( name=s.name).first() if not db_service: # print("Did not find service *********** name={0} prod={1} ver={2} extra={3} fing={4}".format(s.name, s.product, s.version, s.extrainfo, s.fingerprint)) db_service = serviceObj(s.name, s.product, s.version, s.extrainfo, s.fingerprint) session.add(db_service) # else: # print("FOUND service *************** name={0}".format(db_service.name)) else: # else, there is no service info to parse db_service = None # fetch the port db_port = session.query(portObj).filter_by( hostId=db_host.id).filter_by( portId=p.portId).filter_by( protocol=p.protocol).first() if not db_port: # print("Did not find port *********** portid={0} proto={1}".format(p.portId, p.protocol)) if db_service: db_port = portObj(p.portId, p.protocol, p.state, db_host.id, db_service.id) else: db_port = portObj(p.portId, p.protocol, p.state, db_host.id, '') session.add(db_port) # else: # print('FOUND port *************** portid={0}'.format(db_port.portId)) createPortsProgress = createPortsProgress + ( (100.0 / hostCount) / 5) totalprogress = totalprogress + createPortsProgress self.updateProgressObservable.updateProgress(totalprogress) session.commit() # totalprogress += progress # self.tick.emit(int(totalprogress)) for h in allHosts: # create all script objects that need to be created db_host = self.hostRepository.getHostInformation(h.ip) for p in h.all_ports(): for scr in p.getScripts(): self.tsLog( " Processing script obj {scr}".format( scr=str(scr))) print(" Processing script obj {scr}".format( scr=str(scr))) db_port = session.query(portObj).filter_by( hostId=db_host.id).filter_by( portId=p.portId).filter_by( protocol=p.protocol).first() db_script = session.query(l1ScriptObj).filter_by( scriptId=scr.scriptId).filter_by( portId=db_port.id).first() if not db_script: # if this script object doesn't exist, create it t_l1ScriptObj = l1ScriptObj( scr.scriptId, scr.output, db_port.id, db_host.id) self.tsLog( " Adding l1ScriptObj obj {script}". format(script=scr.scriptId)) session.add(t_l1ScriptObj) for hs in h.getHostScripts(): db_script = session.query(l1ScriptObj).filter_by( scriptId=hs.scriptId).filter_by( hostId=db_host.id).first() if not db_script: t_l1ScriptObj = l1ScriptObj(hs.scriptId, hs.output, None, db_host.id) session.add(t_l1ScriptObj) session.commit() for h in allHosts: # update everything db_host = self.hostRepository.getHostInformation(h.ip) if db_host.ipv4 == '' and not h.ipv4 == '': db_host.ipv4 = h.ipv4 if db_host.ipv6 == '' and not h.ipv6 == '': db_host.ipv6 = h.ipv6 if db_host.macaddr == '' and not h.macaddr == '': db_host.macaddr = h.macaddr if not h.status == '': db_host.status = h.status if db_host.hostname == '' and not h.hostname == '': db_host.hostname = h.hostname if db_host.vendor == '' and not h.vendor == '': db_host.vendor = h.vendor if db_host.uptime == '' and not h.uptime == '': db_host.uptime = h.uptime if db_host.lastboot == '' and not h.lastboot == '': db_host.lastboot = h.lastboot if db_host.distance == '' and not h.distance == '': db_host.distance = h.distance if db_host.state == '' and not h.state == '': db_host.state = h.state if db_host.count == '' and not h.count == '': db_host.count = h.count session.add(db_host) tmp_name = '' tmp_accuracy = '0' # TODO: check if better to convert to int for comparison os_nodes = h.getOs() for os in os_nodes: db_os = session.query(osObj).filter_by( hostId=db_host.id).filter_by(name=os.name).filter_by( family=os.family).filter_by( generation=os.generation).filter_by( osType=os.osType).filter_by( vendor=os.vendor).first() db_os.osAccuracy = os.accuracy # update the accuracy if not os.name == '': # get the most accurate OS match/accuracy to store it in the host table for easier access if os.accuracy > tmp_accuracy: tmp_name = os.name tmp_accuracy = os.accuracy if os_nodes: # if there was operating system info to parse if not tmp_name == '' and not tmp_accuracy == '0': # update the current host with the most accurate OS match db_host.osMatch = tmp_name db_host.osAccuracy = tmp_accuracy session.add(db_host) for scr in h.getHostScripts(): print("-----------------------Host SCR: {0}".format( scr.scriptId)) db_host = self.hostRepository.getHostInformation(h.ip) scrProcessorResults = scr.scriptSelector(db_host) for scrProcessorResult in scrProcessorResults: session.add(scrProcessorResult) for scr in h.getScripts(): print("-----------------------SCR: {0}".format( scr.scriptId)) db_host = self.hostRepository.getHostInformation(h.ip) scrProcessorResults = scr.scriptSelector(db_host) for scrProcessorResult in scrProcessorResults: session.add(scrProcessorResult) for p in h.all_ports(): s = p.getService() if not (s is None): # db_service = session.query(serviceObj).filter_by(name=s.name).filter_by(product=s.product).filter_by(version=s.version).filter_by(extrainfo=s.extrainfo).filter_by(fingerprint=s.fingerprint).first() db_service = session.query(serviceObj).filter_by( name=s.name).first() else: db_service = None # fetch the port db_port = session.query(portObj).filter_by( hostId=db_host.id).filter_by( portId=p.portId).filter_by( protocol=p.protocol).first() if db_port: # print("************************ Found {0}".format(db_port)) if db_port.state != p.state: db_port.state = p.state session.add(db_port) if not ( db_service is None ) and db_port.serviceId != db_service.id: # if there is some new service information, update it db_port.serviceId = db_service.id session.add(db_port) for scr in p.getScripts( ): # store the script results (note that existing script outputs are also kept) db_script = session.query(l1ScriptObj).filter_by( scriptId=scr.scriptId).filter_by( portId=db_port.id).first() if not scr.output == '' and scr.output is not None: db_script.output = scr.output session.add(db_script) self.updateProgressObservable.updateProgress(100) session.commit() self.db.dbsemaphore.release() # we are done with the DB self.tsLog(f"Finished in {str(time() - startTime)} seconds.") self.done.emit() self.updateProgressObservable.finished() # call the scheduler (if there is no terminal output it means we imported nmap) self.schedule.emit(parser, self.output == '') except Exception as e: self.tsLog('Something went wrong when parsing the nmap file..') self.tsLog("Unexpected error: {0}".format(sys.exc_info()[0])) self.tsLog(e) raise self.done.emit()
def __init__(self, html='', arrayList=[]): Parser.__init__(self, html, arrayList) self._json = None self._ceps = [] self._address = []
from parsers.Parser import Parser p = Parser() p.create_json('./scraper/data/')
def get_parser(): return Parser()
License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. """ from app.auxiliary import log from parsers.Parser import Parser if __name__ == '__main__': parser = Parser('a-full.xml') log.info('\nscan session:') session = parser.getSession() log.info("\tstart time:\t" + session.startTime) log.info("\tstop time:\t" + session.finish_time) log.info("\tnmap version:\t" + session.nmapVersion) log.info("\tnmap args:\t" + session.scanArgs) log.info("\ttotal hosts:\t" + session.totalHosts) log.info("\tup hosts:\t" + session.upHosts) log.info("\tdown hosts:\t" + session.downHosts) for h in parser.getAllHosts(): log.info('host ' + h.ip + ' is ' + h.status)
samplerate=samplerate, channels=2, blocking=True) print("* done recording") sf.write(filename, mydata, samplerate) CHUNK = 1024 FORMAT = pyaudio.paInt16 # paInt8 CHANNELS = 2 RATE = 44100 # sample rate RECORD_SECONDS = 5 # WAVE_OUTPUT_FILENAME = "pedido_usuario.wav" ######### parser = Parser() gateway = OmdbGateway() service = OmdbService(gateway) INFORMACION_DISPONIBLE = [ "actores", "género", "año", "duración", "director", "idioma", "productor" ] ######### stt = SpeechToTextV1(username='******', password='******') tts = TextToSpeechV1(username='******', password='******') # # def record_old(output): # p = pyaudio.PyAudio()
def __init__(self, html = '', arrayList = []): Parser.__init__(self, html, arrayList) self._imgs = []
logger = logging.getLogger(__name__) logger_formatter = logging.Formatter( fmt='%(asctime)-15s %(name)s %(levelname)s: %(message)s') logger_handler = logging.StreamHandler() logger_handler.setFormatter(logger_formatter) logger.addHandler(logger_handler) logger.setLevel(logging.INFO) if __name__ == '__main__': scraper = Scraper() parser = Parser() path = get_day_folder_path(days_diff=-1) + "/raw_upcoming_matches.txt" matches_lst_raw_text = open(path, 'r').read() prs = Parser() matches_lst = parser.parse_upcoming_matches_ids(matches_lst_raw_text) for i, match_title in enumerate(matches_lst): try: logger.info(msg=f'Scraping {i} out of {len(matches_lst)} match.') scraper.scrape_match_info(match_title, info_types=[ MatchInfoType.RESULT,
def __init__(self, html='', lista=[]): Parser.__init__(self, html, lista) self._emails = []
def __init__(self, html='', arrayList=[]): Parser.__init__(self, html, arrayList) self._list = [] self._externals = []