コード例 #1
0
def generate(strategy, source, target, time, test, name):
    nodes = {}
    #Cerco la strategia di mining richiesta
    strategyConfig = Parser.searchMinerStrategy(strategy)
    #Eseguo quella statica se presente
    if 'static' in strategyConfig:
        print('Executing static mining...')
        StaticMinerContext.doStaticMining(strategyConfig['static']['class'], source, strategyConfig['static']['args'] if 'args' in strategyConfig['static'] else {}, nodes)
    #Eseguo quella dinamica se presente
    if 'dynamic' in strategyConfig:
        print('Executing dynamic mining...')
        if 'args' in strategyConfig['dynamic']:
            strategyConfig['dynamic']['args']['time'] = time
            strategyConfig['dynamic']['args']['test'] = test
            DynamicMinerContext.doDynamicMining(strategyConfig['dynamic']['class'], source, strategyConfig['dynamic']['args'], nodes)
        else:
            strategyConfig['dynamic']['args'] = {'time': time}
            strategyConfig['dynamic']['args'] = {'test': test}
            DynamicMinerContext.doDynamicMining(strategyConfig['dynamic']['class'], source, strategyConfig['dynamic']['args'], nodes)
    
    #Carico ed eseguo tutte le strategie di refinement
    refinerStrategies = Parser.getRefinerStrategies()
    if refinerStrategies:
        print('Executing Refinement...')
        RefinerContext.doRefinement(refinerStrategies, nodes)

    #Esporto il microTOSCA
    print('Exporting microTOSCA...')
    YMLExporter.export(nodes, target, name)
コード例 #2
0
	def extract(self, source):
		"""Extract an image from *source*.

		If the image is supported an instance of PIL's Image is returned, otherwise None.
		"""
		p = Parser()
		f = open_pds(source)
		if self.log: self.log.debug("Parsing '%s'" % (source))
		self.labels = p.parse(f)
		if self.log: self.log.debug("Found %d labels" % (len(self.labels)))
		if self._check_image_is_supported():
			if self.log: self.log.debug("Image in '%s' is supported" % (source))
			dim = self._get_image_dimensions()
			loc = self._get_image_location()
			imageSampleBits = int(self.labels['IMAGE']['SAMPLE_BITS'])
			imageSampleType = self.labels['IMAGE']['SAMPLE_TYPE']
			md5Checksum = self._get_image_checksum()
			if self.log: self.log.debug("Image dimensions should be %s" % (str(dim)))
			if self.log: self.log.debug("Seeking to image data at %d" % (loc))
			f.seek(loc)
			if imageSampleBits == 8:
				readSize = dim[0] * dim[1]
			elif imageSampleBits == 16:
				readSize = dim[0] * dim[1] * 2
			print readSize
			if self.log: self.log.debug("Seek successful, reading data (%s)" % (readSize))
			# rawImageData = f.readline()
			# f.seek(-int(self.labels["RECORD_BYTES"]), os.SEEK_CUR)
			rawImageData = f.read(readSize)
			if md5Checksum:
				rawImageChecksum = hashlib.md5(rawImageData).hexdigest()
				checksumVerificationPassed = rawImageChecksum == md5Checksum and True or False
				if not checksumVerificationPassed:
					if self.log: self.log.debug("Secure hash verification failed")
					if self.raisesChecksumError:
						errorMessage = "Verification failed! Expected '%s' but got '%s'." % (md5Checksum, rawImageChecksum)
						raise ChecksumError, errorMessage
				else:
					if self.log: self.log.debug("Secure hash verification passed")
			if self.log: self.log.debug("Read successful (len: %d), creating Image object" % (len(rawImageData)))
			# The frombuffer defaults may change in a future release;
			# for portability, change the call to read:
			# frombuffer(mode, size, data, 'raw', mode, 0, 1).
			if (imageSampleBits == 16) and imageSampleType == ('MSB_INTEGER'):
				#img = Image.frombuffer('I', dim, rawImageData, 'raw', 'I;16BS', 0, 1)
				img = Image.frombuffer('F', dim, rawImageData, 'raw', 'F;16B', 0, 1)
				img = ImageMath.eval("convert(a/16.0, 'L')", a=img)
			else:
				img = Image.frombuffer('L', dim, rawImageData, 'raw', 'L', 0, 1)
			if self.log:
				self.log.debug("Image result: %s" % (str(img)))
				self.log.debug("Image info: %s" % (str(img.info)))
				self.log.debug("Image mode: %s" % (str(img.mode)))
				self.log.debug("Image size: %s" % (str(img.size)))
		else:
			if self.log: self.log.error("Image is not supported '%s'" % (source))
			img = None
		f.close()

		return img, self.labels
コード例 #3
0
ファイル: imageextractor.py プロジェクト: afrigeri/PyPDS
	def extract(self, source):
		"""Extract an image from *source*.
		
		If the image is supported an instance of PIL's Image is returned, otherwise None.
		"""
		p = Parser()
		f = open_pds(source)
		if self.log: self.log.debug("Parsing '%s'" % (source))
		self.labels = p.parse(f)
		if self.log: self.log.debug("Found %d labels" % (len(self.labels)))
		if self._check_image_is_supported():
			if self.log: self.log.debug("Image in '%s' is supported" % (source))
			dim = self._get_image_dimensions()
			loc = self._get_image_location()
			if self.log: self.log.debug("Image dimensions should be %s" % (str(dim)))
			if self.log: self.log.debug("Seeking to image data at %d" % (loc))
			f.seek(loc)
			if self.log: self.log.debug("Seek successful, reading data")
			# rawImageData = f.readline()
			# f.seek(-int(self.labels["RECORD_BYTES"]), os.SEEK_CUR)
			rawImageData = f.read(dim[0] * dim[1])
			if self.log: self.log.debug("Read successful (len: %d), creating Image object" % (len(rawImageData)))
			# The frombuffer defaults may change in a future release;
			# for portability, change the call to read:
			# frombuffer(mode, size, data, 'raw', mode, 0, 1).
			img = Image.frombuffer('L', dim, rawImageData, 'raw', 'L', 0, 1)
			if self.log: self.log.debug("Image result: %s" % (str(img)))
			if self.log: self.log.debug("Image info: %s" % (str(img.info)))
			if self.log: self.log.debug("Image size: %s" % (str(img.size)))
		else:
			if self.log: self.log.error("Image is not supported '%s'" % (source))
			img = None
		f.close()
				
		return img, self.labels
コード例 #4
0
 def __init__(self, domain, display=None):
     self.domain = domain
     self.display = display
     self.results = ""
     self.user_agent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)"
     self.p = ProgressBar(display=self.display)
     self.gather()
     self.parser = Parser(self.results, self.domain)
コード例 #5
0
def invitees():
    strategy = DistanceStrategy(GLOBAL_CIRCLE_DISTANCE)
    service = InviteService(distance_estimator=strategy)
    file_path = './test_customer.json'
    parser = Parser()
    data = parser.parsing(file_path=file_path, decoder=CustomerDecoder)
    result =  service.calculate(data)
    yield result
    def init(self):
        parser = Parser(self.file_path)
        parser.parse()

        grid_factory = GridFactory(parser.get_parsed_data())
        grid_factory.create_grid()
        self.grid = grid_factory.get_grid()

        self.constraint_service = ConstraintService(self.grid)
コード例 #7
0
 def input_cmd(self):
     if self.recv_count < len(self.peer_address_list):
         return
     parser = Parser()
     cmd_str = input("MiniDFS> ")
     cmd_str = "put ptb.wrd"
     parser.judge_cmd(cmd_str)
     self.request_buffer = parser.data
     self.recv_count = 0
コード例 #8
0
ファイル: parser_test.py プロジェクト: fkrishna/PyTTP
    def test_resolvePath_srcAttrElements_returnFullPath(self):
        html = '<img src="/testing"/>'
        res = Parser.resolve_path(html, config.HOST)
        print(res)
        self.assertEqual(res, f'<img src="{config.HOST}/testing"/>')

        html = '<iframe src="/testing"></iframe>'
        res = Parser.resolve_path(html, config.HOST)
        print(res)
        self.assertEqual(res, f'<iframe src="{config.HOST}/testing"></iframe>')
コード例 #9
0
ファイル: parser_test.py プロジェクト: fkrishna/PyTTP
    def test_resolvePath_hrefAttrElements_returnFullPath(self):
        html = '<a href="/testing"></a>'
        res = Parser.resolve_path(html, config.HOST)
        print(res)
        self.assertEqual(res, f'<a href="{config.HOST}/testing"></a>')

        html = '<link href="/testing"/>'
        res = Parser.resolve_path(html, config.HOST)
        print(res)
        self.assertEqual(res, f'<link href="{config.HOST}/testing"/>')
コード例 #10
0
ファイル: parser_test.py プロジェクト: fkrishna/PyTTP
    def test_resolvePath_attrMissing_returnOrigin(self):
        html = '<img/>'
        res = Parser.resolve_path(html, config.HOST)
        print(res)
        self.assertEqual(res, html)

        html = '<a></a>'
        res = Parser.resolve_path(html, config.HOST)
        print(res)
        self.assertEqual(res, html)
コード例 #11
0
ファイル: environment.py プロジェクト: smaass/monito
 def add_primitives(self, bindings):
     return self.update(
         {
             k: Primitive(
                 k,
                 p[0],
                 Parser.parse_type(Parser.string_to_sexpr(p[1])),
                 self
             )
             for k, p in bindings.items()
         }
     )
コード例 #12
0
ファイル: test_parser.py プロジェクト: smaass/monito
    def test_parse_args(self):

        args_str = "([x: (Num -> Str)] [y: (List Num)] z)"
        args = Parser.parse_args(Parser.string_to_sexpr(args_str))

        self.assertEqual(len(args), 3)
        self.assertEqual(args[0].type, FunType([NumType()], StringType()))
        self.assertEqual(args[0].identifier, "x")

        self.assertEqual(args[1].type, ListType(NumType()))
        self.assertEqual(args[1].identifier, "y")

        self.assertEqual(args[2].type, DynamicType())
        self.assertEqual(args[2].identifier, "z")
コード例 #13
0
ファイル: gather.py プロジェクト: DarthRa/SPF
class Gather():
    def __init__(self, domain, display=None):
        self.domain = domain
        self.display = display
        self.results = ""
        self.user_agent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)"
        self.p = ProgressBar(display=self.display)
        self.gather()
        self.parser = Parser(self.results, self.domain)

    def hosts(self):
        return self.parser.hosts()

    def emails(self):
        return self.parser.emails()

    @staticmethod
    def get_sources():
        return "Currently searching [google, bing, ask, dogpile, yandex, baidu, yahoo, duckduckgo]"

    def search(self, url, offset=1, maxoffset=0, title=""):
        current_offset = 0
        data = ""
        self.p.reset(title=title)
        while current_offset <= maxoffset:
            self.p.rotate()
            temp_url = re.sub(r'\[\[OFFSET\]\]', str(current_offset), url)
            try:
                headers = { 'User-Agent' : self.user_agent }
                req = urllib2.Request(temp_url, None, headers)
                data += urllib2.urlopen(req).read()
            except urllib2.URLError as e:
                self.display.error("Could not access [%s]" % (title))
                return data
            except Exception as e:
                print e
            current_offset += offset
        self.p.done()
        return data
    
    def gather(self, maxoffset=500):
        self.results += self.search(title="Google",     url="http://www.google.com/search?num=100&start=[[OFFSET]]&hl=en&meta=&q=%40\"" + self.domain + "\"", offset=100, maxoffset=maxoffset)
        self.results += self.search(title="Bing",       url="http://www.bing.com/search?q=%40" + self.domain + "&count=50&first=[[OFFSET]]", offset=50, maxoffset=maxoffset)
        self.results += self.search(title="Ask",        url="http://www.ask.com/web?q=%40" + self.domain + "&pu=100&page=[[OFFSET]]", offset=100, maxoffset=maxoffset)
        self.results += self.search(title="Dogpile",    url="http://www.dogpile.com/search/web?qsi=[[OFFSET]]&q=\"%40" + self.domain + "\"", offset=10, maxoffset=maxoffset/10)
        self.results += self.search(title="Yandex",     url="http://www.yandex.com/search?text=%40" + self.domain + "&numdoc=50&lr=[[OFFSET]]", offset=50, maxoffset=maxoffset)
        self.results += self.search(title="Baidu",      url="http://www.baidu.com/s?wd=%40" + self.domain + "&pn=[[OFFSET]]", offset=10, maxoffset=maxoffset/10)
        self.results += self.search(title="Yahoo",      url="https://search.yahoo.com/search?p=\"%40" + self.domain + "\"&b=[[OFFSET]]&pz=10", offset=10, maxoffset=maxoffset/10)
        self.results += self.search(title="DuckDuckGo", url="https://duckduckgo.com/lite?q=\"%40" + self.domain + "\"" )
コード例 #14
0
class Gather():
    def __init__(self, domain, display=None):
        self.domain = domain
        self.display = display
        self.results = ""
        self.user_agent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)"
        self.p = ProgressBar(display=self.display)
        self.gather()
        self.parser = Parser(self.results, self.domain)

    def hosts(self):
        return self.parser.hosts()

    def emails(self):
        return self.parser.emails()

    @staticmethod
    def get_sources():
        return "Currently searching [google, bing, ask, dogpile, yandex, baidu, yahoo, duckduckgo]"

    def search(self, url, offset=1, maxoffset=0, title=""):
        current_offset = 0
        data = ""
        self.p.reset(title=title)
        while current_offset <= maxoffset:
            self.p.rotate()
            temp_url = re.sub(r'\[\[OFFSET\]\]', str(current_offset), url)
            try:
                headers = { 'User-Agent' : self.user_agent }
                req = urllib.request.Request(str(temp_url), None, headers)
                data += str(urllib.request.urlopen(req).read())
            except urllib.error.URLError as e:
                self.display.error("Could not access [%s]" % (title))
                return data
            except Exception as e:
                print(e)
            current_offset += offset
        self.p.done()
        return data
    
    def gather(self, maxoffset=500):
        self.results += self.search(title="Google",     url="http://www.google.com/search?num=100&start=[[OFFSET]]&hl=en&meta=&q=%40\"" + self.domain + "\"", offset=100, maxoffset=maxoffset)
        self.results += self.search(title="Bing",       url="http://www.bing.com/search?q=%40" + self.domain + "&count=50&first=[[OFFSET]]", offset=50, maxoffset=maxoffset)
        self.results += self.search(title="Ask",        url="http://www.ask.com/web?q=%40" + self.domain + "&pu=100&page=[[OFFSET]]", offset=100, maxoffset=maxoffset)
        self.results += self.search(title="Dogpile",    url="http://www.dogpile.com/search/web?qsi=[[OFFSET]]&q=\"%40" + self.domain + "\"", offset=10, maxoffset=maxoffset/10)
        self.results += self.search(title="Yandex",     url="http://www.yandex.com/search?text=%40" + self.domain + "&numdoc=50&lr=[[OFFSET]]", offset=50, maxoffset=maxoffset)
        self.results += self.search(title="Baidu",      url="http://www.baidu.com/s?wd=%40" + self.domain + "&pn=[[OFFSET]]", offset=10, maxoffset=maxoffset/10)
        self.results += self.search(title="Yahoo",      url="https://search.yahoo.com/search?p=\"%40" + self.domain + "\"&b=[[OFFSET]]&pz=10", offset=10, maxoffset=maxoffset/10)
        self.results += self.search(title="DuckDuckGo", url="https://duckduckgo.com/lite?q=\"%40" + self.domain + "\"" )
コード例 #15
0
def main():
    start, dest = process_args()
    parser = Parser(start, dest)

    try:
        routes = parser.get_routes()
    except InvalidInput as e:
        print(e)
        return 1
    except NotFound as e:
        print(e)
        return 1

    output = ConstructXML()
    output.print_xml(routes)
コード例 #16
0
ファイル: main.py プロジェクト: philipmoniaga/hackathon2
def main():
    strategy = DistanceStrategy(GLOBAL_CIRCLE_DISTANCE)
    file_path = os.path.abspath(FILE_NAME)
    service = InviteService(distance_estimator=strategy)
    data = Parser.parsing(file_path=file_path, decoder=CustomerDecoder)
    result = service.calculate(data)
    FormatterOutput.output(result)
コード例 #17
0
    def render(self, tutorial):
        """ Render a single HTML document of tutorial

            Args:
                tutorial (document.Tutorial): tutorial object
            Returns:
                (str): Html document
        """

        if not isinstance(tutorial, Tutorial):
            return

        doc = Renderer.render(document=tutorial)
        doc = Parser.filter(doc)
        doc = Parser.resolve_path(doc, config.HOST)
        return doc
コード例 #18
0
    def execute(cls, entrypoint, dest, ext=config.DOCEXTS[0], debug=False):
        """ Factory Method """

        err = 0
        try:
            ttp = PyTTP()
            print(f'- Parsing the entry point: {entrypoint}')
            tutorial = ttp.parse(entrypoint)

            print(f'- Extracting content from host for {tutorial}')
            urls = Parser.extract_href(tutorial.table_contents)
            ttp.extract(tutorial, urls[:2])

            print(f'- Rendering html')
            html = ttp.render(tutorial)

            print(f'- Writting ({ext}) document on disk')
            ttp.write(filename=tutorial.name, data=html, dest=dest, ext=ext)
        except HostNameError as e:
            err = 1
            print('error:', e)
        except EntryPointError as e:
            err = 1
            print(f'error:{entrypoint} is not a valid entry point')
        except NotADirectoryError as e:
            err = 1
            print('error:', e)
        except FileTypeError as e:
            err = 1
            print('error:', e)
        finally:
            return err
コード例 #19
0
	def start(self):
		try:
			self.wait_for_start()

			self._istart = True
			
			"""load seed """
			self.load_seeds()	#load seeds from google search 

			
			"""show welcome info"""
			self.show_welcome()
			self._status._sys_start	= time()

			"""start threads"""
			self._downloader = Downloader( self._config._down_num, self._status)
			self._downloader.start()
			self._parser     = Parser(self._config._parser_num, self._status )
			self._parser.start()
			self._downloader_pool_checker.start()
			self._parse_pool_checker.start()
			self._status_update.start()


			"""notify mysql, i am started"""
			self.sqlex.write_if_start()
			
		except (Exception) as e:
			Log().debug("start failed")
			raise(e)
			return False
コード例 #20
0
ファイル: monito.py プロジェクト: smaass/monito
 def eval(self, code_string):
     try:
         ast = Parser.parse(code_string)
         return self.interpret(ast)
     except Exception as e:
         name = e.__class__.__name__
         message = e.args[0]
         return '{0}: {1}'.format(name, message)
コード例 #21
0
ファイル: gather.py プロジェクト: BahtiyarB/SPF
 def __init__(self, domain, display=None):
     self.domain = domain
     self.display = display
     self.results = ""
     self.user_agent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)"
     self.p = ProgressBar(display=self.display)
     self.gather()
     self.parser = Parser(self.results, self.domain)
コード例 #22
0
ファイル: parser_test.py プロジェクト: fkrishna/PyTTP
 def test_extractHref_anchorTagHrefAttrMissing_returnEmptyList(self):
     html = '''<html>
                 <a>link1</a>
                 <a>link2</a>
             </html>'''
     res = Parser.extract_href(html)
     print(res)
     self.assertEqual(res, [])
コード例 #23
0
ファイル: controller.py プロジェクト: litchi125/tools
    def prepare(self):
        """ preparation/initialization of opts and env: parsing & checks """

        # declare nullscan options
        self.opt = Option(sys.argv)

        # check argc and argc (usage)
        self.check.check_argc(len(sys.argv))
        self.check.check_argv(sys.argv)

        # check for missing libraries / deps / python modules
        self.check.check_deps(self.file.read_file(PYDEPS))

        # parse cmdline and config options, update final options dictionary
        try:
            self.parser = Parser(self.opt.opts)
            self.parser.parse_cmdline()
            self.parser.parse_config()
            self.opt.opts = self.parser.opts
        except:
            self.log('usage', _type='err', end='\n')

        # update final options dictionary
        self.opt.update_opts()

        # further checks for usage, options, env, etc.
        self.check.check_opts(self.opt.opts)

        # collect all py-files and grep the tools out of the py-files
        tools = []
        py_files = self.misc.find_py_files(MOD_PATH)
        for py in py_files:
            tools.append(self.misc.grep_tools(py))
        tools = [x for sublist in tools for x in sublist]

        # create the locks for each tool except for excluded ones
        with ThreadPoolExecutor(50) as exe:
            for tool in tools:
                if tool not in self.opt.opts['tools']['ex_tools']:
                    exe.submit(self.file.create_lock, tool)

        # copy debug flag to target_opts (for nullscan tools)
        self.opt.opts['targets_opts']['debug'] = self.opt.opts['debug']

        return
コード例 #24
0
    def extract(self, source):
        """Extract an image from *source*.
		
		If the image is supported an instance of PIL's Image is returned, otherwise None.
		"""
        p = Parser()
        f = open_pds(source)
        pdsdatadir, pdsfile = os.path.split(source)
        if self.log: self.log.debug("Parsing '%s'" % (source))
        self.labels = p.parse(f)
        if self.log: self.log.debug("Found %d labels" % (len(self.labels)))
        if self._check_table_is_supported():
            if self.log:
                self.log.debug("Table in '%s' is supported" % (source))
            dim = self._get_table_dimensions()

            # Get the location of the table
            location = self._get_table_location().strip().replace("\"", "")
            #location = os.path.join(pdsdatadir,location)

            # Get the structure of the table from the pointer
            struct_fname = self._get_table_structure().strip().replace(
                "\"", "")
            structurefile = getPdsFileName(struct_fname, pdsdatadir)

            sp = ColumnParser()
            s = open_pds(structurefile)
            slabels = sp.parse(s)
            columns = []
            for l in slabels:
                columns.append(l['COLUMN']['NAME'].strip().replace("\"", ""))
            if self.log: self.log.debug("Found %d columns" % (len(columns)))
            if self.labels['TABLE']['INTERCHANGE_FORMAT'] == 'ASCII':
                locationfile = getPdsFileName(location, pdsdatadir)
                tbl = csv.DictReader(open(locationfile),
                                     fieldnames=columns,
                                     delimiter=' ')

        else:
            if self.log:
                self.log.error("Table is not supported '%s'" % (source))
            tbl = None
        f.close()

        return tbl, self.labels
コード例 #25
0
ファイル: tableextractor.py プロジェクト: afrigeri/PyPDS
    def extract(self, source):
        """Extract an image from *source*.
		
		If the image is supported an instance of PIL's Image is returned, otherwise None.
		"""
        p = Parser()
        f = open_pds(source)
        pdsdatadir, pdsfile = os.path.split(source)
        if self.log:
            self.log.debug("Parsing '%s'" % (source))
        self.labels = p.parse(f)
        if self.log:
            self.log.debug("Found %d labels" % (len(self.labels)))
        if self._check_table_is_supported():
            if self.log:
                self.log.debug("Table in '%s' is supported" % (source))
            dim = self._get_table_dimensions()

            # Get the location of the table
            location = self._get_table_location().strip().replace('"', "")
            # location = os.path.join(pdsdatadir,location)

            # Get the structure of the table from the pointer
            struct_fname = self._get_table_structure().strip().replace('"', "")
            structurefile = getPdsFileName(struct_fname, pdsdatadir)

            sp = ColumnParser()
            s = open_pds(structurefile)
            slabels = sp.parse(s)
            columns = []
            for l in slabels:
                columns.append(l["COLUMN"]["NAME"].strip().replace('"', ""))
            if self.log:
                self.log.debug("Found %d columns" % (len(columns)))
            if self.labels["TABLE"]["INTERCHANGE_FORMAT"] == "ASCII":
                locationfile = getPdsFileName(location, pdsdatadir)
                tbl = csv.DictReader(open(locationfile), fieldnames=columns, delimiter=" ")

        else:
            if self.log:
                self.log.error("Table is not supported '%s'" % (source))
            tbl = None
        f.close()

        return tbl, self.labels
コード例 #26
0
ファイル: test_evaluation.py プロジェクト: smaass/monito
    def test_environment(self):

        runtime = Monito()
        bindings = {
            'x': 4,
            'hola': Monito.run('(max (list 1 3 2))'),
            '+': Primitive(
                '+',
                lambda x, y: x * y,
                Parser.parse_type(Parser.string_to_sexpr('Num Num -> Num')),
                runtime.environment
            )
        }
        new_env = runtime.environment.new_environment(bindings)

        self.assertEqual(Monito.run('(- 3 x)', new_env), -1)
        self.assertEqual(Monito.run('(- 10 hola)', new_env), 7)
        self.assertEqual(Monito.run('(+ 2 3)', new_env), 6)
コード例 #27
0
ファイル: main.py プロジェクト: Alireza-/data-code-kata
    def run(self):
        """
        Entry point for the program.

        :return: none
        """
        self.logger.info("Fixed width file generator is starting ...")
        generator = Generator()
        generator.generate_fixed_width_file(
            self.config['FILE']['SPEC_FILE'],
            int(self.config['FILE']['NO_OF_RECORDS']),
            self.config['FILE']['FIXED_WIDTH_FILE'])

        self.logger.info("Fixed width file parser is starting ...")
        parser = Parser()
        parser.convert_fixed_width_to_csv(
            self.config['FILE']['SPEC_FILE'],
            self.config['FILE']['FIXED_WIDTH_FILE'],
            self.config['FILE']['CSV_FILE'], self.config['FILE']['DELIMITER'])
コード例 #28
0
    def parse(self, entrypoint):
        """ Parse the entry point

            Args:
                entrypoint (str): url of any readable tutorial from HOST

        """

        if not is_valid_hostname(entrypoint):
            raise HostNameError(f'{entrypoint} is not a valid host name')

        meta = Parser.resolve_path(
            Parser.parse(url=entrypoint, section=Section.META), config.HOST)
        table_contents = Parser.resolve_path(
            Parser.parse(url=entrypoint, section=Section.TABLE_CONTENTS),
            config.HOST)

        name = self.__parse_tutorial_name(entrypoint)

        return Tutorial(name, meta, table_contents)
コード例 #29
0
ファイル: test_parser.py プロジェクト: smaass/monito
    def test_parse_type(self):

        self.assertEqual(Parser.parse_type("Num"), NumType())
        self.assertEqual(Parser.parse_type("Str"), StringType())
        self.assertEqual(Parser.parse_type("Bool"), BoolType())
        self.assertEqual(Parser.parse_type("Void"), UnitType())
        self.assertEqual(Parser.parse_type("Dyn"), DynamicType())
        self.assertEqual(Parser.parse_type(["Num", "->", "Num"]), FunType([NumType()], NumType()))
        self.assertEqual(Parser.parse_type(["->", "Void"]), FunType([], UnitType()))
        self.assertEqual(
            Parser.parse_type([["Str", "->", "Str"], "->", "Num"]),
            FunType([FunType([StringType()], StringType())], NumType()),
        )
コード例 #30
0
ファイル: test_parser.py プロジェクト: smaass/monito
    def test_separate_sexpr_strings(self):

        code = """
            (define x 3)
            (define f (x) (+ x 4))
            (f x)
        """
        sexpr_strings = Parser.separate_sexpr_strings(code)
        self.assertEqual(len(sexpr_strings), 3)
        self.assertEqual(sexpr_strings[0], "(define x 3)")
        self.assertEqual(sexpr_strings[1], "(define f (x) (+ x 4))")
        self.assertEqual(sexpr_strings[2], "(f x)")
コード例 #31
0
ファイル: test_parser.py プロジェクト: smaass/monito
    def test_string_to_sexpr(self):

        self.assertEqual(Parser.string_to_sexpr("true"), True)

        sexpr = Parser.string_to_sexpr("(and true false)")
        self.assertEqual(sexpr, ["and", True, False])

        sexpr = Parser.string_to_sexpr("(+ (- 3 2) (sum 1 2 3 4))")
        self.assertEqual(sexpr, ["+", ["-", 3, 2], ["sum", 1, 2, 3, 4]])

        sexpr = Parser.string_to_sexpr('(f (g (h 2 3 4) "hola") i)')
        self.assertEqual(sexpr, ["f", ["g", ["h", 2, 3, 4], '"hola"'], "i"])

        sexpr = Parser.string_to_sexpr(
            """
            {local
                [ (a 3) (b 2) ]
                (f a b)
            }
        """
        )
        self.assertEqual(sexpr, ["local", [["a", 3], ["b", 2]], ["f", "a", "b"]])
コード例 #32
0
ファイル: dataCollector.py プロジェクト: orf53975/SPF-1
 def run(self):
     # verify that self.config["XXXXXXXXXX_path"] exists
     if (self.path):
         if (os.path.isfile(self.path)):
             # Start process
             process = self.run_command()
         else:
             return "ERROR: " + self.name + "_path does not point to a valid file"
     else:
         return "ERROR: " + self.name + "_path is not configured"
     self.results = self.load_results()
     self.parser = Parser(self.results, self.domain)
     self.cleanup()
     return None
コード例 #33
0
ファイル: test_parser.py プロジェクト: smaass/monito
    def test_parse_arg(self):

        num_arg1 = Parser.string_to_sexpr("[x : Num]")
        num_arg2 = Parser.string_to_sexpr("[x: Num]")
        self.assertEqual(num_arg1, num_arg2)

        num_arg = Parser.parse_arg(num_arg1)
        self.assertEqual(num_arg.identifier, "x")
        self.assertEqual(num_arg.type, NumType())

        string_arg = self.to_arg("[s: Str]")
        self.assertEqual(string_arg.identifier, "s")
        self.assertEqual(string_arg.type, StringType())

        bool_arg = self.to_arg("[x: Bool]")
        self.assertEqual(bool_arg.identifier, "x")
        self.assertEqual(bool_arg.type, BoolType())

        list_arg = self.to_arg("[l: (List Str)]")
        self.assertEqual(list_arg.identifier, "l")
        self.assertEqual(list_arg.type, ListType(StringType()))

        dynamic_arg = self.to_arg("d")
        self.assertEqual(dynamic_arg.identifier, "d")
        self.assertEqual(dynamic_arg.type, DynamicType())

        fun_arg = self.to_arg("[f: (Str -> Num)]")
        self.assertEqual(fun_arg.identifier, "f")
        self.assertEqual(fun_arg.type, FunType([StringType()], NumType()))

        fun_arg = self.to_arg("[g: (Num Str -> Bool)]")
        self.assertEqual(fun_arg.identifier, "g")
        self.assertEqual(fun_arg.type, FunType([NumType(), StringType()], BoolType()))

        fun_arg = self.to_arg("[h: ((Num -> Num) -> (Str -> Num))]")
        self.assertEqual(fun_arg.identifier, "h")
        self.assertEqual(fun_arg.type, FunType([FunType([NumType()], NumType())], FunType([StringType()], NumType())))
コード例 #34
0
ファイル: gather.py プロジェクト: BahtiyarB/SPF
class Gather():
    def __init__(self, domain, display=None):
        self.domain = domain
        self.display = display
        self.results = ""
        self.user_agent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)"
        self.p = ProgressBar(display=self.display)
        self.gather()
        self.parser = Parser(self.results, self.domain)

    def hosts(self):
        return self.parser.hosts()

    def emails(self):
        return self.parser.emails()

    @staticmethod
    def get_sources():
        return "Currently searching [google, bing, ask, dogpile, yandex, baidu, yahoo, duckduckgo]"

    def search(self, url, offset=1, maxoffset=0, title=""):
        current_offset = 0
        data = ""
        self.p.reset(title=title)
        while current_offset <= maxoffset:
            self.p.rotate()
            temp_url = re.sub(r'\[\[OFFSET\]\]', str(current_offset), url)
            try:
                headers = { 'User-Agent' : self.user_agent }
                req = urllib2.Request(temp_url, None, headers)
                data += urllib2.urlopen(req).read()
            except Exception, e:
                print e
            current_offset += offset
        self.p.done()
        return data
コード例 #35
0
    def extract(self, tutorial, urls=[], trace=True):
        """ Extracting content section from each given url
        
            Args:
                tutorial (document.Tutorial): tutorial object
                urls (list): urls
                trace (boolean): print the current url that is being parse
        """

        if not isinstance(tutorial, Tutorial):
            return

        for url in urls:
            if trace: print(f'\t. {url}....................')
            content = Parser.parse(url=url, section=Section.CONTENT)
            tutorial.contents.append(content)
コード例 #36
0
ファイル: test_parser.py プロジェクト: smaass/monito
    def test_parse_function_with_types(self):

        definition = "(fun ([x: Num] [y: Num]) (+ x y))"
        fun_node = Parser.parse(definition)
        fun_args = fun_node.args

        self.assertEqual(len(fun_args), 2)
        self.assertTrue(isinstance(fun_args[0], Argument))

        self.assertEqual(fun_args[0].type, NumType())
        self.assertEqual(fun_args[0].identifier, "x")

        self.assertEqual(fun_args[1].type, NumType())
        self.assertEqual(fun_args[1].identifier, "y")

        self.assertTrue(isinstance(fun_node.body, App))
コード例 #37
0
ファイル: engine.py プロジェクト: derrick0714/course_code
	def __init__( self):
		self._istart		= False
		self._status		= Status()

		"""--- load config file----"""
		self._config 		= Configuration();
	
		"""--- core object ----"""
		self._downloader	= None
		self._parser		= Parser( self._config._down_num, self._status )

		"""--- memory models --- """
		self._download_pool	= SafeQueue() #Store the html objects to be downloaded by the downloader
		self._parse_pool	= SafeQueue() #Store the html objects to be parsed by the parser
		
		"""--- checker threads --- """
		"""The target is the function passed in to 
		run in the thread. Those two threads keep checking 
		and assigning jobs to the two thread pools"""
		self._downloader_pool_checker = Thread( target=self.download_pool_checker)
		self._parse_pool_checker = Thread( target=self.parse_pool_checker)
		
		"""---  threads --- """
		self._status_update = Thread( target=self.status_update) #every second, this thread post runtime info to remote mysql

		""" ---strategies--- """
		self._earlyvisithandler	=	EarlyVisitHandler()
		self._robothandler  	=	RobotHandler()
		self._cgihandler		=	CGIHandler()
		self._nestlevelhandler 	=	NestLevelHandler()
		self._schemehandler    	=	SchemeHandler()
		self._filetypehandler	=	FileTypeHandler()
		self._bookmarkhandler	=	BookMarkHandler()
		self._omitindex			=	OmitIndex()
		self._urlextender		=	URLExtender()			
	
		""" ---init the path for saving data, if the folder don't exist, create it ---"""
		self._path			= self._config._down_path+"/"+ strftime('%Y-%m-%d', localtime())+"/"+ strftime('%H-%M-%S', localtime())+"/"
		if not os.path.exists(self._path):
			os.makedirs(self._path)

		self._config._down_path = self._path
		
		self._keywords_links= []

		""" ---Mysql Manager--- """
		self.sqlex      = DatabseManager(self._config)
コード例 #38
0
class TestParser(unittest.TestCase):
    parser = Parser()
    validator = Validator()

    def test_read_invalid_account_number_ill(self):
        valid_account_output = "12345678? ILL"
        calculated_account_number = self.parser.parse_an_account_number(
            data.INPUT123_INVALID)
        account_ill = self.validator.validate_account(
            calculated_account_number)
        self.assertEqual(account_ill.account_data, valid_account_output)

    def test_read_invalid_digits_ill_all(self):
        valid_account_output = "????????? ILL"
        calculated_account_number = self.parser.parse_an_account_number(
            data.INPUT_WITH_ILLS_ALL)
        account_ill = self.validator.validate_account(
            calculated_account_number)
        self.assertEqual(account_ill.account_data, valid_account_output)

    def test_read_checksumm_valid_one(self):
        valid_account_output = "000000051"
        calculated_account_number = self.parser.parse_an_account_number(
            data.INPUT_VALID_CHECKSUM1)
        account_ill = self.validator.validate_account(
            calculated_account_number)
        self.assertEqual(account_ill.account_data, valid_account_output)

    def test_read_checksumm_valid_two(self):
        valid_account_output = "345882865"
        calculated_account_number = self.parser.parse_an_account_number(
            data.INPUT_VALID_CHECKSUM2)
        account_ill = self.validator.validate_account(
            calculated_account_number)
        self.assertEqual(account_ill.account_data, valid_account_output)

    def test_read_checksumm_invalid(self):
        valid_account_output = "664371495 ERR"
        calculated_account_number = self.parser.parse_an_account_number(
            data.INPUT_INVALID_CHECKSUM)
        account_ill = self.validator.validate_account(
            calculated_account_number)
        self.assertEqual(account_ill.account_data, valid_account_output)
コード例 #39
0
    def start(self):
        """ do first needed things """

        # init, usage, checks, etc.
        Help.banner()
        self.opts = vars(Parser.parseArgs())
        c = Check(self.opts)
        c.checkArgc()
        c.checkArgs()
        c.checkInstallType()

        # run installer here
        if self.opts['type'] == 'text':
            t = TextInstaller(self.opts['verbose'])
            t.run()
        else:
            c = CursesInstaller(self.opts['verbose'])
            c.run()

        return
コード例 #40
0
    def start(self):
        """ do first needed things """

        # init, usage, checks, etc.
        Help.banner()
        self.opts = vars(Parser.parseArgs())
        c = Check(self.opts)
        c.checkArgc()
        c.checkArgs()
        c.checkInstallType()

        # run installer here
        if self.opts['type'] == 'text':
            t = TextInstaller(self.opts['verbose'])
            t.run()
        else:
            c = CursesInstaller(self.opts['verbose'])
            c.run()

        return
コード例 #41
0
ファイル: monito.py プロジェクト: smaass/monito
    def repl(cls):
        print('Welcome to the Monito REPL\n')
        runtime = Monito()
        line_breaks = 0
        code_input = ''

        while runtime.active:

            if line_breaks == 0:
                prompt = '>> '
            else:
                prompt = '\t'

            code_input += cls.input(prompt)
            balanced, fail_index = Parser.balanced_parens(code_input)
            if not balanced and fail_index == len(code_input):
                line_breaks += 1
                continue

            value = runtime.eval(code_input)
            if value is not None:
                print(value)
            line_breaks = 0
            code_input = ''
コード例 #42
0
ファイル: parser_test.py プロジェクト: fkrishna/PyTTP
 def test_extractHref_anchorTag_returnList(self):
     html = '<html><a href="/link1"></a><a href="/link 2"></a></html>'
     res = Parser.extract_href(html)
     print(res)
     self.assertIsNotNone(res)
コード例 #43
0
ファイル: parser_test.py プロジェクト: fkrishna/PyTTP
 def test_extractHref_anchorTagMissing_returnEmptyList(self):
     html = '<html></html>'
     res = Parser.extract_href('')
     print(res)
     self.assertEqual(res, [])
コード例 #44
0
 def start():
     Config.start()
     Parser.start()
コード例 #45
0
ファイル: test_parser.py プロジェクト: smaass/monito
    def test_ast_generation(self):

        self.assertTrue(isinstance(Parser.parse("false"), Boolean))
        self.assertTrue(isinstance(Parser.parse("2"), Number))
        self.assertTrue(isinstance(Parser.parse("0.2"), Number))
コード例 #46
0
ファイル: parser_test.py プロジェクト: fkrishna/PyTTP
 def test_parseTableContents_validEntryPoint_returnStr(self):
     res = Parser.parse(url=config.ENTRYPOINT,
                        section=Section.TABLE_CONTENTS)
     print('')
     self.assertIsInstance(res, str)
コード例 #47
0
ファイル: parser_test.py プロジェクト: fkrishna/PyTTP
 def test_resolvePath_emptyArg_returnOrigin(self):
     html = ''
     res = Parser.resolve_path(html, '')
     print(res)
     self.assertEqual(res, html)
コード例 #48
0
class Engine(object):
	def __init__( self):
		self._istart		= False
		self._status		= Status()

		"""--- load config file----"""
		self._config 		= Configuration();
	
		"""--- core object ----"""
		self._downloader	= None
		self._parser		= None

		"""--- memory models --- """
		self._download_pool	= SafeQueue() #Store the html objects to be downloaded by the downloader
		self._parse_pool	= SafeQueue() #Store the html objects to be parsed by the parser
		
		"""--- checker threads --- """
		"""The target is the function passed in to 
		run in the thread. Those two threads keep checking 
		and assigning jobs to the two thread pools"""
		self._downloader_pool_checker = Thread( target=self.download_pool_checker)
		self._parse_pool_checker = Thread( target=self.parse_pool_checker)
		
		"""---  threads --- """
		self._status_update = Thread( target=self.status_update) #every second, this thread post runtime info to remote mysql

		""" ---strategies--- """
		self._earlyvisithandler	=	EarlyVisitHandler()
		self._robothandler  	=	RobotHandler()
		self._cgihandler		=	CGIHandler()
		self._nestlevelhandler 	=	NestLevelHandler()
		self._schemehandler    	=	SchemeHandler()
		self._filetypehandler	=	FileTypeHandler()
		self._bookmarkhandler	=	BookMarkHandler()
		self._omitindex			=	OmitIndex()
		self._urlextender		=	URLExtender()			
	
		""" ---init the path for saving data, if the folder don't exist, create it ---"""
		self._path			= self._config._down_path+"/"+ strftime('%Y-%m-%d', localtime())+"/"+ strftime('%H-%M-%S', localtime())+"/"
		if not os.path.exists(self._path):
			os.makedirs(self._path)

		self._config._down_path = self._path
		
		self._keywords_links= []

		""" ---Mysql Manager--- """
		self.sqlex      = DatabseManager(self._config)

		#self.f= open("data.txt", 'w')

	def load_seeds(self):
		#load seed info from config file	
		#print "load_seeds 1"
		#load seed from 
		contacter = SearchGoogle(self._config._keywords, self._config._result_num)
		self._keywords_links = contacter.getURLs()
		#append seeds, which from google search result, into download pool
		#print "load_seeds 2"
		#self._keywords_links.insert(0, "https://twitter.com/")
		#self._keywords_links.insert(0, "https://twitter.com/signup?context=login")
		
		i = 0
		for url in self._keywords_links:
			if i < self._config._result_num:
				#print "@@{0}".format(url)
				html_task = Html(url)

				#print "@@1"
				if(self._schemehandler.SchemeChecker(html_task)==False):
					#print("Ingore the wrong scheme, this link is within page {0} , so don't download".format(html_task._parent), html_task._url)
					#print "@@2"
					self._status._scheme+=1
					continue
				if(self._bookmarkhandler.BookMarkChecker(html_task)==True):
					#print("Ingore bookmark link, this link is within page {0} , so don't download".format(html_task._parent), html_task._url)
					#print "@@3"
					self._status._bookmark+=1
					continue
				if(self._cgihandler.FindCGI(html_task)==True):
					#print("Ingore the link contain cgi, this link is within page {0} , so don't download".format(html_task._parent), html_task._url)
					#print "@@4"
					self._status._cgi+=1
					continue
				if(self._nestlevelhandler.checknestlevel(html_task,self._config._parser_nlv)==True):
					self._status._nestlv +=1
					#print "@@5"
					#print("Ingore the link nested too much, this link is within page {0} , so don't download".format(html_task._parent), html_task._url)
					continue
				if(self._filetypehandler.FileTypeChecker(html_task)==False):
					#print "@@6"
					self._status._file_type +=1
					continue
				#print "@@7"
				'''
				if(self._earlyvisithandler.check_visited(html_task) == True):
					self._status._early_visit +=1
					#print("Ingore the link visited before, this link is within page {0} , so don't download".format(html_task._parent), html_task._url)
					continue
				'''
				self._omitindex.Omit(html_task)
				"""
				print "@@8"
				if(self._robothandler.is_allowed(html_task) == False):
					print "@@9"
					self._status._robot +=1
					#print("Blocked by the Robot.txt, this link is within page {0} , so don't download".format(html_task._parent), html_task._url)
					continue
				print "@@10"
				"""
				self._earlyvisithandler.add_entry(html_task._md5, html_task)
				self._download_pool.append(html_task)
				'''If use the following two line of code, then the program won't run, which means checking for revisit works'''
				'''however, the dic should be safe with a lock'''
				#self._visited_dic[html_task._md5] = html_task._url 
				#print(len(self._visited_dic))
				#print "@@11"
			else:

				break
			i+=1
		#print "load_seeds 3"
	def show_welcome(self):
		print("download folder:"+self._path)
		print "key words:"+self._config._keywords
		print "donload thread num: {0}".format(self._config._down_num)
		print "parse thread num: {0}".format(self._config._parser_num)
		print "Load " +str(self._config._result_num)+" results from google search:"
		
		i = 0
		for url in self._keywords_links:
			if i < self._config._result_num:
				print ("[{0}]".format(i)+url)
			i+=1
		print "\n------------------------------------------------------------------------\n"

		#raw_input("press any key to start crawling, press second key to stop")
	
	def wait_for_start(self):
		print "ready for start....."
		print "go to http://dengxu.me/crawling/ to input some key words & see the result "

		while( self.sqlex.read_if_start(self._config)!= True):
			sleep(1)
		print "\n------------------------------------------------------------------------\n"
		print "starting crawling engine...."


	def start(self):
		try:
			self.wait_for_start()

			self._istart = True
			
			"""load seed """
			self.load_seeds()	#load seeds from google search 

			
			"""show welcome info"""
			self.show_welcome()
			self._status._sys_start	= time()

			"""start threads"""
			self._downloader = Downloader( self._config._down_num, self._status)
			self._downloader.start()
			self._parser     = Parser(self._config._parser_num, self._status )
			self._parser.start()
			self._downloader_pool_checker.start()
			self._parse_pool_checker.start()
			self._status_update.start()


			"""notify mysql, i am started"""
			self.sqlex.write_if_start()
			
		except (Exception) as e:
			Log().debug("start failed")
			raise(e)
			return False

		
		
	def stop(self):
		self._istart = False
		""""clear download and parse popl"""
		self._download_pool.clear()
		self._parse_pool.clear()

		"""stop downloader and parser threads"""
		self._downloader.stop()
		self._parser.stop()
		""""Those two checker threads will end when the thread who calls them ends"""
		self._downloader_pool_checker.join()
		self._parse_pool_checker.join()
		self._status_update.join()
		print ("Engine is stopping")

	def pause(self):
		pass

	def finish_download(self, html_task):
			
		
		
		
		sentence = "Downloaded:[No.{0}] time:{1:0.1f} page:depth_parent {2}_{3} http-code: {4} data-size: {5}byes url: {6}"\
			.format(self._status._download_times,time()-self._status._sys_start,html_task._depth,\
		html_task._parent,html_task._return_code, html_task._data_size, html_task._url )

		#if self._status._download_times <= 500 :
		#	self.f.write(sentence+"\n")
			


		"""caculate the path for saving files"""
		full_path = self._path+"[No.{0}]_".format(self._status._download_times)+".html"

		"""save html data to files"""
		#f= open(full_path, 'w')
		#f.write(html_task._data)
		#f.close()


		"""After downloading, pass the data(still using the html objects) to the parse pool"""
		self._parse_pool.append(html_task)




	def finish_parse(self, html_task):
		'''
		print("parsed:[No.{0}] time:{1:0.1f} page:depth_parent {2}_{3} http-status: {4} data-size: {5}byes url:{6}"\
			.format(self._status._download_times,time()-self._status._sys_start,html_task._depth,\
		html_task._parent,html_task._return_code, html_task._data_size, html_task._url))
		'''
		"""After parsing, pass the urls to be downloaded to the download pool"""
		if(self._earlyvisithandler.check_visited(html_task) == True):
			#print("Ingore the link visited before, this link is within page {0} , so don't put it in queue".format(html_task._parent), html_task._url)
			self._status._early_visit +=1
			return
		if(self._robothandler.is_allowed(html_task) == False):
			#print("Blocked by the Robot.txt, this link is within page {0} , so don't download".format(html_task._parent), html_task._url)
			self._status._robot +=1
			return
		
		self._earlyvisithandler.add_entry(html_task._md5, html_task)
		self._download_pool.append(html_task)
		




	def download_pool_checker(self):
		while (self._istart == True):
			new_download_task = self._download_pool.pop_left()
			"""If there is no task remain in the download pool, put the thread into sleep"""
			"""else pop the new task, and download it"""
			"""for the engine to get the result to put into the parse pool, we need to pass the function finish_download down as a callback"""
			
			if (new_download_task == None):
				#print("No task remaining in download_pool")
				sleep(0.1)
			else:
				self._downloader.queue_download_task(new_download_task , self.finish_download)


	def parse_pool_checker(self):
		while (self._istart == True):
			new_parse_task = self._parse_pool.pop_left()
			if (new_parse_task == None):
				#print("sleeping")
				sleep(0.1)				
			else:

				self._parser.queue_parse_task(new_parse_task, self.finish_parse)





	#~~~see result at http://dengxu.me/crawling/
	def status_update(self):

		while (self._istart == True):

			self._status._download_queue = self._downloader.len()
			self._status._parse_queue = self._parser.len()
			
			
			sentence = "[time: {0:0.1f}],queue:{8}, down: {1}, total: {2:0.1f}MB | queue:{9}, parsed: {3},scheme:{10}, cig: {4}, bookmark: {11} type {12} visited: {5}, robot: {6},nestlv: {7} | error: 404: {13} , timeout: {14}"\
			.format( time()-self._status._sys_start,\
		 	self._status._download_times, float(self._status._download_size)/1024/1024, self._status._parse_times\
		 	,self._status._cgi, self._status._early_visit, self._status._robot, self._status._nestlv\
		 	,self._downloader.len(), self._parser.len(),self._status._scheme_type, self._status._bookmark, self._status._file_type\
		 	,self._status._404,self._status._socket_timeout)
			
			print sentence

			#if( self._status._download_times > 500):
			#	self.f.write( sentence+"\n")
			

			"""update status tp mysql"""
			self.sqlex.write_status(self._status)
			
			"""update recent download url"""
			self.sqlex.write_recent_download(self._status)
			
			sleep(1)
コード例 #49
0
ファイル: parser_test.py プロジェクト: fkrishna/PyTTP
 def test_resolvePath_emptyAttr_returnOrigin(self):
     html = '<a href=""></a>'
     res = Parser.resolve_path(html, config.HOST)
     print(res)
     self.assertEqual(res, html)
コード例 #50
0
ファイル: test_parser.py プロジェクト: smaass/monito
    def test_balanced_parens(self):

        self.assertTrue(Parser.balanced_parens("()")[0])
        self.assertFalse(Parser.balanced_parens("(")[0])
        self.assertFalse(Parser.balanced_parens(")")[0])
        self.assertTrue(Parser.balanced_parens("(a)")[0])
        self.assertTrue(Parser.balanced_parens("[][]")[0])
        self.assertTrue(Parser.balanced_parens("(a [b] (c {d}))")[0])
        self.assertTrue(Parser.balanced_parens("(ab [c e (e) {a}] [d])")[0])
        self.assertFalse(Parser.balanced_parens("(a))")[0])
        self.assertFalse(Parser.balanced_parens("([][]}")[0])
        self.assertFalse(Parser.balanced_parens("{[[]}")[0])
        self.assertFalse(Parser.balanced_parens(")ab(")[0])
コード例 #51
0
# Author: Abubakar Nur Khalil
# License: MIT
# Purpose: Appropriate parsed output from Parser

from utils.tokens import TokenType

from core.scanner import Scanner
from core.parser import Parser

from tools.custom_syntax import Scanner as _Virgil
from tools.custom_syntax import Parser  as _Dante

# Remember we always need to generate the KSL first and pass it over
KSL = _Dante(_Virgil('').scan()).parse()

source = """
var name = "ank";
77.67 * (8 // 2);
"""

print('Source code:')
print(source)

tks = Scanner(source, KSL[0]).scan()

pr = Parser(tks, KSL[1])

print("\nFirst Token is variable (VAR):", pr.check(TokenType.VAR)) # True
コード例 #52
0
ファイル: test_parser.py プロジェクト: smaass/monito
    def to_arg(self, arg_string):

        sexpr = Parser.string_to_sexpr(arg_string)
        return Parser.parse_arg(sexpr)