Python Parserの例、core.parser.Parser Pythonの例

コード例 #1

0

ファイルを表示

def generate(strategy, source, target, time, test, name):
    nodes = {}
    #Cerco la strategia di mining richiesta
    strategyConfig = Parser.searchMinerStrategy(strategy)
    #Eseguo quella statica se presente
    if 'static' in strategyConfig:
        print('Executing static mining...')
        StaticMinerContext.doStaticMining(strategyConfig['static']['class'], source, strategyConfig['static']['args'] if 'args' in strategyConfig['static'] else {}, nodes)
    #Eseguo quella dinamica se presente
    if 'dynamic' in strategyConfig:
        print('Executing dynamic mining...')
        if 'args' in strategyConfig['dynamic']:
            strategyConfig['dynamic']['args']['time'] = time
            strategyConfig['dynamic']['args']['test'] = test
            DynamicMinerContext.doDynamicMining(strategyConfig['dynamic']['class'], source, strategyConfig['dynamic']['args'], nodes)
        else:
            strategyConfig['dynamic']['args'] = {'time': time}
            strategyConfig['dynamic']['args'] = {'test': test}
            DynamicMinerContext.doDynamicMining(strategyConfig['dynamic']['class'], source, strategyConfig['dynamic']['args'], nodes)
    
    #Carico ed eseguo tutte le strategie di refinement
    refinerStrategies = Parser.getRefinerStrategies()
    if refinerStrategies:
        print('Executing Refinement...')
        RefinerContext.doRefinement(refinerStrategies, nodes)

    #Esporto il microTOSCA
    print('Exporting microTOSCA...')
    YMLExporter.export(nodes, target, name)

コード例 #2

0

ファイルを表示

	def extract(self, source):
		"""Extract an image from *source*.

		If the image is supported an instance of PIL's Image is returned, otherwise None.
		"""
		p = Parser()
		f = open_pds(source)
		if self.log: self.log.debug("Parsing '%s'" % (source))
		self.labels = p.parse(f)
		if self.log: self.log.debug("Found %d labels" % (len(self.labels)))
		if self._check_image_is_supported():
			if self.log: self.log.debug("Image in '%s' is supported" % (source))
			dim = self._get_image_dimensions()
			loc = self._get_image_location()
			imageSampleBits = int(self.labels['IMAGE']['SAMPLE_BITS'])
			imageSampleType = self.labels['IMAGE']['SAMPLE_TYPE']
			md5Checksum = self._get_image_checksum()
			if self.log: self.log.debug("Image dimensions should be %s" % (str(dim)))
			if self.log: self.log.debug("Seeking to image data at %d" % (loc))
			f.seek(loc)
			if imageSampleBits == 8:
				readSize = dim[0] * dim[1]
			elif imageSampleBits == 16:
				readSize = dim[0] * dim[1] * 2
			print readSize
			if self.log: self.log.debug("Seek successful, reading data (%s)" % (readSize))
			# rawImageData = f.readline()
			# f.seek(-int(self.labels["RECORD_BYTES"]), os.SEEK_CUR)
			rawImageData = f.read(readSize)
			if md5Checksum:
				rawImageChecksum = hashlib.md5(rawImageData).hexdigest()
				checksumVerificationPassed = rawImageChecksum == md5Checksum and True or False
				if not checksumVerificationPassed:
					if self.log: self.log.debug("Secure hash verification failed")
					if self.raisesChecksumError:
						errorMessage = "Verification failed! Expected '%s' but got '%s'." % (md5Checksum, rawImageChecksum)
						raise ChecksumError, errorMessage
				else:
					if self.log: self.log.debug("Secure hash verification passed")
			if self.log: self.log.debug("Read successful (len: %d), creating Image object" % (len(rawImageData)))
			# The frombuffer defaults may change in a future release;
			# for portability, change the call to read:
			# frombuffer(mode, size, data, 'raw', mode, 0, 1).
			if (imageSampleBits == 16) and imageSampleType == ('MSB_INTEGER'):
				#img = Image.frombuffer('I', dim, rawImageData, 'raw', 'I;16BS', 0, 1)
				img = Image.frombuffer('F', dim, rawImageData, 'raw', 'F;16B', 0, 1)
				img = ImageMath.eval("convert(a/16.0, 'L')", a=img)
			else:
				img = Image.frombuffer('L', dim, rawImageData, 'raw', 'L', 0, 1)
			if self.log:
				self.log.debug("Image result: %s" % (str(img)))
				self.log.debug("Image info: %s" % (str(img.info)))
				self.log.debug("Image mode: %s" % (str(img.mode)))
				self.log.debug("Image size: %s" % (str(img.size)))
		else:
			if self.log: self.log.error("Image is not supported '%s'" % (source))
			img = None
		f.close()

		return img, self.labels

コード例 #3

0

ファイルを表示

ファイル: imageextractor.py プロジェクト: afrigeri/PyPDS

	def extract(self, source):
		"""Extract an image from *source*.
		
		If the image is supported an instance of PIL's Image is returned, otherwise None.
		"""
		p = Parser()
		f = open_pds(source)
		if self.log: self.log.debug("Parsing '%s'" % (source))
		self.labels = p.parse(f)
		if self.log: self.log.debug("Found %d labels" % (len(self.labels)))
		if self._check_image_is_supported():
			if self.log: self.log.debug("Image in '%s' is supported" % (source))
			dim = self._get_image_dimensions()
			loc = self._get_image_location()
			if self.log: self.log.debug("Image dimensions should be %s" % (str(dim)))
			if self.log: self.log.debug("Seeking to image data at %d" % (loc))
			f.seek(loc)
			if self.log: self.log.debug("Seek successful, reading data")
			# rawImageData = f.readline()
			# f.seek(-int(self.labels["RECORD_BYTES"]), os.SEEK_CUR)
			rawImageData = f.read(dim[0] * dim[1])
			if self.log: self.log.debug("Read successful (len: %d), creating Image object" % (len(rawImageData)))
			# The frombuffer defaults may change in a future release;
			# for portability, change the call to read:
			# frombuffer(mode, size, data, 'raw', mode, 0, 1).
			img = Image.frombuffer('L', dim, rawImageData, 'raw', 'L', 0, 1)
			if self.log: self.log.debug("Image result: %s" % (str(img)))
			if self.log: self.log.debug("Image info: %s" % (str(img.info)))
			if self.log: self.log.debug("Image size: %s" % (str(img.size)))
		else:
			if self.log: self.log.error("Image is not supported '%s'" % (source))
			img = None
		f.close()
				
		return img, self.labels

コード例 #4

0

ファイルを表示

 def __init__(self, domain, display=None):
     self.domain = domain
     self.display = display
     self.results = ""
     self.user_agent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)"
     self.p = ProgressBar(display=self.display)
     self.gather()
     self.parser = Parser(self.results, self.domain)

コード例 #5

0

ファイルを表示

def invitees():
    strategy = DistanceStrategy(GLOBAL_CIRCLE_DISTANCE)
    service = InviteService(distance_estimator=strategy)
    file_path = './test_customer.json'
    parser = Parser()
    data = parser.parsing(file_path=file_path, decoder=CustomerDecoder)
    result =  service.calculate(data)
    yield result

コード例 #6

0

ファイルを表示

ファイル: problem.py プロジェクト: Weenkus/on_power_efficient_virtual_network_function_placement_algorithm

    def init(self):
        parser = Parser(self.file_path)
        parser.parse()

        grid_factory = GridFactory(parser.get_parsed_data())
        grid_factory.create_grid()
        self.grid = grid_factory.get_grid()

        self.constraint_service = ConstraintService(self.grid)

コード例 #7

0

ファイルを表示

 def input_cmd(self):
     if self.recv_count < len(self.peer_address_list):
         return
     parser = Parser()
     cmd_str = input("MiniDFS> ")
     cmd_str = "put ptb.wrd"
     parser.judge_cmd(cmd_str)
     self.request_buffer = parser.data
     self.recv_count = 0

コード例 #8

0

ファイルを表示

ファイル: parser_test.py プロジェクト: fkrishna/PyTTP

    def test_resolvePath_srcAttrElements_returnFullPath(self):
        html = '<img src="/testing"/>'
        res = Parser.resolve_path(html, config.HOST)
        print(res)
        self.assertEqual(res, f'<img src="{config.HOST}/testing"/>')

        html = '<iframe src="/testing"></iframe>'
        res = Parser.resolve_path(html, config.HOST)
        print(res)
        self.assertEqual(res, f'<iframe src="{config.HOST}/testing"></iframe>')

コード例 #9

0

ファイルを表示

ファイル: parser_test.py プロジェクト: fkrishna/PyTTP

    def test_resolvePath_hrefAttrElements_returnFullPath(self):
        html = '<a href="/testing"></a>'
        res = Parser.resolve_path(html, config.HOST)
        print(res)
        self.assertEqual(res, f'<a href="{config.HOST}/testing"></a>')

        html = '<link href="/testing"/>'
        res = Parser.resolve_path(html, config.HOST)
        print(res)
        self.assertEqual(res, f'<link href="{config.HOST}/testing"/>')

コード例 #10

0

ファイルを表示

ファイル: parser_test.py プロジェクト: fkrishna/PyTTP

    def test_resolvePath_attrMissing_returnOrigin(self):
        html = '<img/>'
        res = Parser.resolve_path(html, config.HOST)
        print(res)
        self.assertEqual(res, html)

        html = '<a></a>'
        res = Parser.resolve_path(html, config.HOST)
        print(res)
        self.assertEqual(res, html)

コード例 #11

0

ファイルを表示

ファイル: environment.py プロジェクト: smaass/monito

 def add_primitives(self, bindings):
     return self.update(
         {
             k: Primitive(
                 k,
                 p[0],
                 Parser.parse_type(Parser.string_to_sexpr(p[1])),
                 self
             )
             for k, p in bindings.items()
         }
     )

コード例 #12

0

ファイルを表示

ファイル: test_parser.py プロジェクト: smaass/monito

    def test_parse_args(self):

        args_str = "([x: (Num -> Str)] [y: (List Num)] z)"
        args = Parser.parse_args(Parser.string_to_sexpr(args_str))

        self.assertEqual(len(args), 3)
        self.assertEqual(args[0].type, FunType([NumType()], StringType()))
        self.assertEqual(args[0].identifier, "x")

        self.assertEqual(args[1].type, ListType(NumType()))
        self.assertEqual(args[1].identifier, "y")

        self.assertEqual(args[2].type, DynamicType())
        self.assertEqual(args[2].identifier, "z")

コード例 #13

0

ファイルを表示

ファイル: gather.py プロジェクト: DarthRa/SPF

class Gather():
    def __init__(self, domain, display=None):
        self.domain = domain
        self.display = display
        self.results = ""
        self.user_agent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)"
        self.p = ProgressBar(display=self.display)
        self.gather()
        self.parser = Parser(self.results, self.domain)

    def hosts(self):
        return self.parser.hosts()

    def emails(self):
        return self.parser.emails()

    @staticmethod
    def get_sources():
        return "Currently searching [google, bing, ask, dogpile, yandex, baidu, yahoo, duckduckgo]"

    def search(self, url, offset=1, maxoffset=0, title=""):
        current_offset = 0
        data = ""
        self.p.reset(title=title)
        while current_offset <= maxoffset:
            self.p.rotate()
            temp_url = re.sub(r'\[\[OFFSET\]\]', str(current_offset), url)
            try:
                headers = { 'User-Agent' : self.user_agent }
                req = urllib2.Request(temp_url, None, headers)
                data += urllib2.urlopen(req).read()
            except urllib2.URLError as e:
                self.display.error("Could not access [%s]" % (title))
                return data
            except Exception as e:
                print e
            current_offset += offset
        self.p.done()
        return data
    
    def gather(self, maxoffset=500):
        self.results += self.search(title="Google",     url="http://www.google.com/search?num=100&start=[[OFFSET]]&hl=en&meta=&q=%40\"" + self.domain + "\"", offset=100, maxoffset=maxoffset)
        self.results += self.search(title="Bing",       url="http://www.bing.com/search?q=%40" + self.domain + "&count=50&first=[[OFFSET]]", offset=50, maxoffset=maxoffset)
        self.results += self.search(title="Ask",        url="http://www.ask.com/web?q=%40" + self.domain + "&pu=100&page=[[OFFSET]]", offset=100, maxoffset=maxoffset)
        self.results += self.search(title="Dogpile",    url="http://www.dogpile.com/search/web?qsi=[[OFFSET]]&q=\"%40" + self.domain + "\"", offset=10, maxoffset=maxoffset/10)
        self.results += self.search(title="Yandex",     url="http://www.yandex.com/search?text=%40" + self.domain + "&numdoc=50&lr=[[OFFSET]]", offset=50, maxoffset=maxoffset)
        self.results += self.search(title="Baidu",      url="http://www.baidu.com/s?wd=%40" + self.domain + "&pn=[[OFFSET]]", offset=10, maxoffset=maxoffset/10)
        self.results += self.search(title="Yahoo",      url="https://search.yahoo.com/search?p=\"%40" + self.domain + "\"&b=[[OFFSET]]&pz=10", offset=10, maxoffset=maxoffset/10)
        self.results += self.search(title="DuckDuckGo", url="https://duckduckgo.com/lite?q=\"%40" + self.domain + "\"" )

コード例 #14

0

ファイルを表示

class Gather():
    def __init__(self, domain, display=None):
        self.domain = domain
        self.display = display
        self.results = ""
        self.user_agent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)"
        self.p = ProgressBar(display=self.display)
        self.gather()
        self.parser = Parser(self.results, self.domain)

    def hosts(self):
        return self.parser.hosts()

    def emails(self):
        return self.parser.emails()

    @staticmethod
    def get_sources():
        return "Currently searching [google, bing, ask, dogpile, yandex, baidu, yahoo, duckduckgo]"

    def search(self, url, offset=1, maxoffset=0, title=""):
        current_offset = 0
        data = ""
        self.p.reset(title=title)
        while current_offset <= maxoffset:
            self.p.rotate()
            temp_url = re.sub(r'\[\[OFFSET\]\]', str(current_offset), url)
            try:
                headers = { 'User-Agent' : self.user_agent }
                req = urllib.request.Request(str(temp_url), None, headers)
                data += str(urllib.request.urlopen(req).read())
            except urllib.error.URLError as e:
                self.display.error("Could not access [%s]" % (title))
                return data
            except Exception as e:
                print(e)
            current_offset += offset
        self.p.done()
        return data
    
    def gather(self, maxoffset=500):
        self.results += self.search(title="Google",     url="http://www.google.com/search?num=100&start=[[OFFSET]]&hl=en&meta=&q=%40\"" + self.domain + "\"", offset=100, maxoffset=maxoffset)
        self.results += self.search(title="Bing",       url="http://www.bing.com/search?q=%40" + self.domain + "&count=50&first=[[OFFSET]]", offset=50, maxoffset=maxoffset)
        self.results += self.search(title="Ask",        url="http://www.ask.com/web?q=%40" + self.domain + "&pu=100&page=[[OFFSET]]", offset=100, maxoffset=maxoffset)
        self.results += self.search(title="Dogpile",    url="http://www.dogpile.com/search/web?qsi=[[OFFSET]]&q=\"%40" + self.domain + "\"", offset=10, maxoffset=maxoffset/10)
        self.results += self.search(title="Yandex",     url="http://www.yandex.com/search?text=%40" + self.domain + "&numdoc=50&lr=[[OFFSET]]", offset=50, maxoffset=maxoffset)
        self.results += self.search(title="Baidu",      url="http://www.baidu.com/s?wd=%40" + self.domain + "&pn=[[OFFSET]]", offset=10, maxoffset=maxoffset/10)
        self.results += self.search(title="Yahoo",      url="https://search.yahoo.com/search?p=\"%40" + self.domain + "\"&b=[[OFFSET]]&pz=10", offset=10, maxoffset=maxoffset/10)
        self.results += self.search(title="DuckDuckGo", url="https://duckduckgo.com/lite?q=\"%40" + self.domain + "\"" )

コード例 #15

0

ファイルを表示

ファイル: schedule.py プロジェクト: DanielFaludi/transport_schedule

def main():
    start, dest = process_args()
    parser = Parser(start, dest)

    try:
        routes = parser.get_routes()
    except InvalidInput as e:
        print(e)
        return 1
    except NotFound as e:
        print(e)
        return 1

    output = ConstructXML()
    output.print_xml(routes)

コード例 #16

0

ファイルを表示

ファイル: main.py プロジェクト: philipmoniaga/hackathon2

def main():
    strategy = DistanceStrategy(GLOBAL_CIRCLE_DISTANCE)
    file_path = os.path.abspath(FILE_NAME)
    service = InviteService(distance_estimator=strategy)
    data = Parser.parsing(file_path=file_path, decoder=CustomerDecoder)
    result = service.calculate(data)
    FormatterOutput.output(result)

コード例 #17

0

ファイルを表示

    def render(self, tutorial):
        """ Render a single HTML document of tutorial

            Args:
                tutorial (document.Tutorial): tutorial object
            Returns:
                (str): Html document
        """

        if not isinstance(tutorial, Tutorial):
            return

        doc = Renderer.render(document=tutorial)
        doc = Parser.filter(doc)
        doc = Parser.resolve_path(doc, config.HOST)
        return doc

コード例 #18

0

ファイルを表示

    def execute(cls, entrypoint, dest, ext=config.DOCEXTS[0], debug=False):
        """ Factory Method """

        err = 0
        try:
            ttp = PyTTP()
            print(f'- Parsing the entry point: {entrypoint}')
            tutorial = ttp.parse(entrypoint)

            print(f'- Extracting content from host for {tutorial}')
            urls = Parser.extract_href(tutorial.table_contents)
            ttp.extract(tutorial, urls[:2])

            print(f'- Rendering html')
            html = ttp.render(tutorial)

            print(f'- Writting ({ext}) document on disk')
            ttp.write(filename=tutorial.name, data=html, dest=dest, ext=ext)
        except HostNameError as e:
            err = 1
            print('error:', e)
        except EntryPointError as e:
            err = 1
            print(f'error:{entrypoint} is not a valid entry point')
        except NotADirectoryError as e:
            err = 1
            print('error:', e)
        except FileTypeError as e:
            err = 1
            print('error:', e)
        finally:
            return err

コード例 #19

0

ファイルを表示

ファイル: engine.py プロジェクト: derrick0714/web_search_engine

	def start(self):
		try:
			self.wait_for_start()

			self._istart = True
			
			"""load seed """
			self.load_seeds()	#load seeds from google search 

			
			"""show welcome info"""
			self.show_welcome()
			self._status._sys_start	= time()

			"""start threads"""
			self._downloader = Downloader( self._config._down_num, self._status)
			self._downloader.start()
			self._parser     = Parser(self._config._parser_num, self._status )
			self._parser.start()
			self._downloader_pool_checker.start()
			self._parse_pool_checker.start()
			self._status_update.start()


			"""notify mysql, i am started"""
			self.sqlex.write_if_start()
			
		except (Exception) as e:
			Log().debug("start failed")
			raise(e)
			return False

コード例 #20

0

ファイルを表示

ファイル: monito.py プロジェクト: smaass/monito

 def eval(self, code_string):
     try:
         ast = Parser.parse(code_string)
         return self.interpret(ast)
     except Exception as e:
         name = e.__class__.__name__
         message = e.args[0]
         return '{0}: {1}'.format(name, message)

コード例 #21

0

ファイルを表示

ファイル: gather.py プロジェクト: BahtiyarB/SPF

 def __init__(self, domain, display=None):
     self.domain = domain
     self.display = display
     self.results = ""
     self.user_agent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)"
     self.p = ProgressBar(display=self.display)
     self.gather()
     self.parser = Parser(self.results, self.domain)

コード例 #22

0

ファイルを表示

ファイル: parser_test.py プロジェクト: fkrishna/PyTTP

 def test_extractHref_anchorTagHrefAttrMissing_returnEmptyList(self):
     html = '''<html>
                 <a>link1</a>
                 <a>link2</a>
             </html>'''
     res = Parser.extract_href(html)
     print(res)
     self.assertEqual(res, [])

コード例 #23

0

ファイルを表示

ファイル: controller.py プロジェクト: litchi125/tools

    def prepare(self):
        """ preparation/initialization of opts and env: parsing & checks """

        # declare nullscan options
        self.opt = Option(sys.argv)

        # check argc and argc (usage)
        self.check.check_argc(len(sys.argv))
        self.check.check_argv(sys.argv)

        # check for missing libraries / deps / python modules
        self.check.check_deps(self.file.read_file(PYDEPS))

        # parse cmdline and config options, update final options dictionary
        try:
            self.parser = Parser(self.opt.opts)
            self.parser.parse_cmdline()
            self.parser.parse_config()
            self.opt.opts = self.parser.opts
        except:
            self.log('usage', _type='err', end='\n')

        # update final options dictionary
        self.opt.update_opts()

        # further checks for usage, options, env, etc.
        self.check.check_opts(self.opt.opts)

        # collect all py-files and grep the tools out of the py-files
        tools = []
        py_files = self.misc.find_py_files(MOD_PATH)
        for py in py_files:
            tools.append(self.misc.grep_tools(py))
        tools = [x for sublist in tools for x in sublist]

        # create the locks for each tool except for excluded ones
        with ThreadPoolExecutor(50) as exe:
            for tool in tools:
                if tool not in self.opt.opts['tools']['ex_tools']:
                    exe.submit(self.file.create_lock, tool)

        # copy debug flag to target_opts (for nullscan tools)
        self.opt.opts['targets_opts']['debug'] = self.opt.opts['debug']

        return

コード例 #24

0

ファイルを表示

    def extract(self, source):
        """Extract an image from *source*.
		
		If the image is supported an instance of PIL's Image is returned, otherwise None.
		"""
        p = Parser()
        f = open_pds(source)
        pdsdatadir, pdsfile = os.path.split(source)
        if self.log: self.log.debug("Parsing '%s'" % (source))
        self.labels = p.parse(f)
        if self.log: self.log.debug("Found %d labels" % (len(self.labels)))
        if self._check_table_is_supported():
            if self.log:
                self.log.debug("Table in '%s' is supported" % (source))
            dim = self._get_table_dimensions()

            # Get the location of the table
            location = self._get_table_location().strip().replace("\"", "")
            #location = os.path.join(pdsdatadir,location)

            # Get the structure of the table from the pointer
            struct_fname = self._get_table_structure().strip().replace(
                "\"", "")
            structurefile = getPdsFileName(struct_fname, pdsdatadir)

            sp = ColumnParser()
            s = open_pds(structurefile)
            slabels = sp.parse(s)
            columns = []
            for l in slabels:
                columns.append(l['COLUMN']['NAME'].strip().replace("\"", ""))
            if self.log: self.log.debug("Found %d columns" % (len(columns)))
            if self.labels['TABLE']['INTERCHANGE_FORMAT'] == 'ASCII':
                locationfile = getPdsFileName(location, pdsdatadir)
                tbl = csv.DictReader(open(locationfile),
                                     fieldnames=columns,
                                     delimiter=' ')

        else:
            if self.log:
                self.log.error("Table is not supported '%s'" % (source))
            tbl = None
        f.close()

        return tbl, self.labels

コード例 #25

0

ファイルを表示

ファイル: tableextractor.py プロジェクト: afrigeri/PyPDS

    def extract(self, source):
        """Extract an image from *source*.
		
		If the image is supported an instance of PIL's Image is returned, otherwise None.
		"""
        p = Parser()
        f = open_pds(source)
        pdsdatadir, pdsfile = os.path.split(source)
        if self.log:
            self.log.debug("Parsing '%s'" % (source))
        self.labels = p.parse(f)
        if self.log:
            self.log.debug("Found %d labels" % (len(self.labels)))
        if self._check_table_is_supported():
            if self.log:
                self.log.debug("Table in '%s' is supported" % (source))
            dim = self._get_table_dimensions()

            # Get the location of the table
            location = self._get_table_location().strip().replace('"', "")
            # location = os.path.join(pdsdatadir,location)

            # Get the structure of the table from the pointer
            struct_fname = self._get_table_structure().strip().replace('"', "")
            structurefile = getPdsFileName(struct_fname, pdsdatadir)

            sp = ColumnParser()
            s = open_pds(structurefile)
            slabels = sp.parse(s)
            columns = []
            for l in slabels:
                columns.append(l["COLUMN"]["NAME"].strip().replace('"', ""))
            if self.log:
                self.log.debug("Found %d columns" % (len(columns)))
            if self.labels["TABLE"]["INTERCHANGE_FORMAT"] == "ASCII":
                locationfile = getPdsFileName(location, pdsdatadir)
                tbl = csv.DictReader(open(locationfile), fieldnames=columns, delimiter=" ")

        else:
            if self.log:
                self.log.error("Table is not supported '%s'" % (source))
            tbl = None
        f.close()

        return tbl, self.labels

コード例 #26

0

ファイルを表示

ファイル: test_evaluation.py プロジェクト: smaass/monito

    def test_environment(self):

        runtime = Monito()
        bindings = {
            'x': 4,
            'hola': Monito.run('(max (list 1 3 2))'),
            '+': Primitive(
                '+',
                lambda x, y: x * y,
                Parser.parse_type(Parser.string_to_sexpr('Num Num -> Num')),
                runtime.environment
            )
        }
        new_env = runtime.environment.new_environment(bindings)

        self.assertEqual(Monito.run('(- 3 x)', new_env), -1)
        self.assertEqual(Monito.run('(- 10 hola)', new_env), 7)
        self.assertEqual(Monito.run('(+ 2 3)', new_env), 6)

コード例 #27

0

ファイルを表示

ファイル: main.py プロジェクト: Alireza-/data-code-kata

    def run(self):
        """
        Entry point for the program.

        :return: none
        """
        self.logger.info("Fixed width file generator is starting ...")
        generator = Generator()
        generator.generate_fixed_width_file(
            self.config['FILE']['SPEC_FILE'],
            int(self.config['FILE']['NO_OF_RECORDS']),
            self.config['FILE']['FIXED_WIDTH_FILE'])

        self.logger.info("Fixed width file parser is starting ...")
        parser = Parser()
        parser.convert_fixed_width_to_csv(
            self.config['FILE']['SPEC_FILE'],
            self.config['FILE']['FIXED_WIDTH_FILE'],
            self.config['FILE']['CSV_FILE'], self.config['FILE']['DELIMITER'])

コード例 #28

0

ファイルを表示

    def parse(self, entrypoint):
        """ Parse the entry point

            Args:
                entrypoint (str): url of any readable tutorial from HOST

        """

        if not is_valid_hostname(entrypoint):
            raise HostNameError(f'{entrypoint} is not a valid host name')

        meta = Parser.resolve_path(
            Parser.parse(url=entrypoint, section=Section.META), config.HOST)
        table_contents = Parser.resolve_path(
            Parser.parse(url=entrypoint, section=Section.TABLE_CONTENTS),
            config.HOST)

        name = self.__parse_tutorial_name(entrypoint)

        return Tutorial(name, meta, table_contents)

コード例 #29

0

ファイルを表示

ファイル: test_parser.py プロジェクト: smaass/monito

    def test_parse_type(self):

        self.assertEqual(Parser.parse_type("Num"), NumType())
        self.assertEqual(Parser.parse_type("Str"), StringType())
        self.assertEqual(Parser.parse_type("Bool"), BoolType())
        self.assertEqual(Parser.parse_type("Void"), UnitType())
        self.assertEqual(Parser.parse_type("Dyn"), DynamicType())
        self.assertEqual(Parser.parse_type(["Num", "->", "Num"]), FunType([NumType()], NumType()))
        self.assertEqual(Parser.parse_type(["->", "Void"]), FunType([], UnitType()))
        self.assertEqual(
            Parser.parse_type([["Str", "->", "Str"], "->", "Num"]),
            FunType([FunType([StringType()], StringType())], NumType()),
        )

コード例 #30

0

ファイルを表示

ファイル: test_parser.py プロジェクト: smaass/monito

    def test_separate_sexpr_strings(self):

        code = """
            (define x 3)
            (define f (x) (+ x 4))
            (f x)
        """
        sexpr_strings = Parser.separate_sexpr_strings(code)
        self.assertEqual(len(sexpr_strings), 3)
        self.assertEqual(sexpr_strings[0], "(define x 3)")
        self.assertEqual(sexpr_strings[1], "(define f (x) (+ x 4))")
        self.assertEqual(sexpr_strings[2], "(f x)")

コード例 #31

0

ファイルを表示

ファイル: test_parser.py プロジェクト: smaass/monito

    def test_string_to_sexpr(self):

        self.assertEqual(Parser.string_to_sexpr("true"), True)

        sexpr = Parser.string_to_sexpr("(and true false)")
        self.assertEqual(sexpr, ["and", True, False])

        sexpr = Parser.string_to_sexpr("(+ (- 3 2) (sum 1 2 3 4))")
        self.assertEqual(sexpr, ["+", ["-", 3, 2], ["sum", 1, 2, 3, 4]])

        sexpr = Parser.string_to_sexpr('(f (g (h 2 3 4) "hola") i)')
        self.assertEqual(sexpr, ["f", ["g", ["h", 2, 3, 4], '"hola"'], "i"])

        sexpr = Parser.string_to_sexpr(
            """
            {local
                [ (a 3) (b 2) ]
                (f a b)
            }
        """
        )
        self.assertEqual(sexpr, ["local", [["a", 3], ["b", 2]], ["f", "a", "b"]])

コード例 #32

0

ファイルを表示

ファイル: dataCollector.py プロジェクト: orf53975/SPF-1

 def run(self):
     # verify that self.config["XXXXXXXXXX_path"] exists
     if (self.path):
         if (os.path.isfile(self.path)):
             # Start process
             process = self.run_command()
         else:
             return "ERROR: " + self.name + "_path does not point to a valid file"
     else:
         return "ERROR: " + self.name + "_path is not configured"
     self.results = self.load_results()
     self.parser = Parser(self.results, self.domain)
     self.cleanup()
     return None

コード例 #33

0

ファイルを表示

ファイル: test_parser.py プロジェクト: smaass/monito

    def test_parse_arg(self):

        num_arg1 = Parser.string_to_sexpr("[x : Num]")
        num_arg2 = Parser.string_to_sexpr("[x: Num]")
        self.assertEqual(num_arg1, num_arg2)

        num_arg = Parser.parse_arg(num_arg1)
        self.assertEqual(num_arg.identifier, "x")
        self.assertEqual(num_arg.type, NumType())

        string_arg = self.to_arg("[s: Str]")
        self.assertEqual(string_arg.identifier, "s")
        self.assertEqual(string_arg.type, StringType())

        bool_arg = self.to_arg("[x: Bool]")
        self.assertEqual(bool_arg.identifier, "x")
        self.assertEqual(bool_arg.type, BoolType())

        list_arg = self.to_arg("[l: (List Str)]")
        self.assertEqual(list_arg.identifier, "l")
        self.assertEqual(list_arg.type, ListType(StringType()))

        dynamic_arg = self.to_arg("d")
        self.assertEqual(dynamic_arg.identifier, "d")
        self.assertEqual(dynamic_arg.type, DynamicType())

        fun_arg = self.to_arg("[f: (Str -> Num)]")
        self.assertEqual(fun_arg.identifier, "f")
        self.assertEqual(fun_arg.type, FunType([StringType()], NumType()))

        fun_arg = self.to_arg("[g: (Num Str -> Bool)]")
        self.assertEqual(fun_arg.identifier, "g")
        self.assertEqual(fun_arg.type, FunType([NumType(), StringType()], BoolType()))

        fun_arg = self.to_arg("[h: ((Num -> Num) -> (Str -> Num))]")
        self.assertEqual(fun_arg.identifier, "h")
        self.assertEqual(fun_arg.type, FunType([FunType([NumType()], NumType())], FunType([StringType()], NumType())))

コード例 #34

0

ファイルを表示

ファイル: gather.py プロジェクト: BahtiyarB/SPF

class Gather():
    def __init__(self, domain, display=None):
        self.domain = domain
        self.display = display
        self.results = ""
        self.user_agent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)"
        self.p = ProgressBar(display=self.display)
        self.gather()
        self.parser = Parser(self.results, self.domain)

    def hosts(self):
        return self.parser.hosts()

    def emails(self):
        return self.parser.emails()

    @staticmethod
    def get_sources():
        return "Currently searching [google, bing, ask, dogpile, yandex, baidu, yahoo, duckduckgo]"

    def search(self, url, offset=1, maxoffset=0, title=""):
        current_offset = 0
        data = ""
        self.p.reset(title=title)
        while current_offset <= maxoffset:
            self.p.rotate()
            temp_url = re.sub(r'\[\[OFFSET\]\]', str(current_offset), url)
            try:
                headers = { 'User-Agent' : self.user_agent }
                req = urllib2.Request(temp_url, None, headers)
                data += urllib2.urlopen(req).read()
            except Exception, e:
                print e
            current_offset += offset
        self.p.done()
        return data

コード例 #35

0

ファイルを表示

    def extract(self, tutorial, urls=[], trace=True):
        """ Extracting content section from each given url
        
            Args:
                tutorial (document.Tutorial): tutorial object
                urls (list): urls
                trace (boolean): print the current url that is being parse
        """

        if not isinstance(tutorial, Tutorial):
            return

        for url in urls:
            if trace: print(f'\t. {url}....................')
            content = Parser.parse(url=url, section=Section.CONTENT)
            tutorial.contents.append(content)

コード例 #36

0

ファイルを表示

ファイル: test_parser.py プロジェクト: smaass/monito

    def test_parse_function_with_types(self):

        definition = "(fun ([x: Num] [y: Num]) (+ x y))"
        fun_node = Parser.parse(definition)
        fun_args = fun_node.args

        self.assertEqual(len(fun_args), 2)
        self.assertTrue(isinstance(fun_args[0], Argument))

        self.assertEqual(fun_args[0].type, NumType())
        self.assertEqual(fun_args[0].identifier, "x")

        self.assertEqual(fun_args[1].type, NumType())
        self.assertEqual(fun_args[1].identifier, "y")

        self.assertTrue(isinstance(fun_node.body, App))

コード例 #37

0

ファイルを表示

ファイル: engine.py プロジェクト: derrick0714/course_code

	def __init__( self):
		self._istart		= False
		self._status		= Status()

		"""--- load config file----"""
		self._config 		= Configuration();
	
		"""--- core object ----"""
		self._downloader	= None
		self._parser		= Parser( self._config._down_num, self._status )

		"""--- memory models --- """
		self._download_pool	= SafeQueue() #Store the html objects to be downloaded by the downloader
		self._parse_pool	= SafeQueue() #Store the html objects to be parsed by the parser
		
		"""--- checker threads --- """
		"""The target is the function passed in to 
		run in the thread. Those two threads keep checking 
		and assigning jobs to the two thread pools"""
		self._downloader_pool_checker = Thread( target=self.download_pool_checker)
		self._parse_pool_checker = Thread( target=self.parse_pool_checker)
		
		"""---  threads --- """
		self._status_update = Thread( target=self.status_update) #every second, this thread post runtime info to remote mysql

		""" ---strategies--- """
		self._earlyvisithandler	=	EarlyVisitHandler()
		self._robothandler  	=	RobotHandler()
		self._cgihandler		=	CGIHandler()
		self._nestlevelhandler 	=	NestLevelHandler()
		self._schemehandler    	=	SchemeHandler()
		self._filetypehandler	=	FileTypeHandler()
		self._bookmarkhandler	=	BookMarkHandler()
		self._omitindex			=	OmitIndex()
		self._urlextender		=	URLExtender()			
	
		""" ---init the path for saving data, if the folder don't exist, create it ---"""
		self._path			= self._config._down_path+"/"+ strftime('%Y-%m-%d', localtime())+"/"+ strftime('%H-%M-%S', localtime())+"/"
		if not os.path.exists(self._path):
			os.makedirs(self._path)

		self._config._down_path = self._path
		
		self._keywords_links= []

		""" ---Mysql Manager--- """
		self.sqlex      = DatabseManager(self._config)

コード例 #38

0

ファイルを表示

ファイル: test_validator.py プロジェクト: Python-Tech-Group/homeworks

class TestParser(unittest.TestCase):
    parser = Parser()
    validator = Validator()

    def test_read_invalid_account_number_ill(self):
        valid_account_output = "12345678? ILL"
        calculated_account_number = self.parser.parse_an_account_number(
            data.INPUT123_INVALID)
        account_ill = self.validator.validate_account(
            calculated_account_number)
        self.assertEqual(account_ill.account_data, valid_account_output)

    def test_read_invalid_digits_ill_all(self):
        valid_account_output = "????????? ILL"
        calculated_account_number = self.parser.parse_an_account_number(
            data.INPUT_WITH_ILLS_ALL)
        account_ill = self.validator.validate_account(
            calculated_account_number)
        self.assertEqual(account_ill.account_data, valid_account_output)

    def test_read_checksumm_valid_one(self):
        valid_account_output = "000000051"
        calculated_account_number = self.parser.parse_an_account_number(
            data.INPUT_VALID_CHECKSUM1)
        account_ill = self.validator.validate_account(
            calculated_account_number)
        self.assertEqual(account_ill.account_data, valid_account_output)

    def test_read_checksumm_valid_two(self):
        valid_account_output = "345882865"
        calculated_account_number = self.parser.parse_an_account_number(
            data.INPUT_VALID_CHECKSUM2)
        account_ill = self.validator.validate_account(
            calculated_account_number)
        self.assertEqual(account_ill.account_data, valid_account_output)

    def test_read_checksumm_invalid(self):
        valid_account_output = "664371495 ERR"
        calculated_account_number = self.parser.parse_an_account_number(
            data.INPUT_INVALID_CHECKSUM)
        account_ill = self.validator.validate_account(
            calculated_account_number)
        self.assertEqual(account_ill.account_data, valid_account_output)

コード例 #39

0

ファイルを表示

ファイル: controller.py プロジェクト: uofis/blackarch-installer

    def start(self):
        """ do first needed things """

        # init, usage, checks, etc.
        Help.banner()
        self.opts = vars(Parser.parseArgs())
        c = Check(self.opts)
        c.checkArgc()
        c.checkArgs()
        c.checkInstallType()

        # run installer here
        if self.opts['type'] == 'text':
            t = TextInstaller(self.opts['verbose'])
            t.run()
        else:
            c = CursesInstaller(self.opts['verbose'])
            c.run()

        return

コード例 #40

0

ファイルを表示

ファイル: controller.py プロジェクト: mattyjones/blackarch-installer

    def start(self):
        """ do first needed things """

        # init, usage, checks, etc.
        Help.banner()
        self.opts = vars(Parser.parseArgs())
        c = Check(self.opts)
        c.checkArgc()
        c.checkArgs()
        c.checkInstallType()

        # run installer here
        if self.opts['type'] == 'text':
            t = TextInstaller(self.opts['verbose'])
            t.run()
        else:
            c = CursesInstaller(self.opts['verbose'])
            c.run()

        return

コード例 #41

0

ファイルを表示

ファイル: monito.py プロジェクト: smaass/monito

    def repl(cls):
        print('Welcome to the Monito REPL\n')
        runtime = Monito()
        line_breaks = 0
        code_input = ''

        while runtime.active:

            if line_breaks == 0:
                prompt = '>> '
            else:
                prompt = '\t'

            code_input += cls.input(prompt)
            balanced, fail_index = Parser.balanced_parens(code_input)
            if not balanced and fail_index == len(code_input):
                line_breaks += 1
                continue

            value = runtime.eval(code_input)
            if value is not None:
                print(value)
            line_breaks = 0
            code_input = ''

コード例 #42

0

ファイルを表示

ファイル: parser_test.py プロジェクト: fkrishna/PyTTP

 def test_extractHref_anchorTag_returnList(self):
     html = '<html><a href="/link1"></a><a href="/link 2"></a></html>'
     res = Parser.extract_href(html)
     print(res)
     self.assertIsNotNone(res)

コード例 #43

0

ファイルを表示

ファイル: parser_test.py プロジェクト: fkrishna/PyTTP

 def test_extractHref_anchorTagMissing_returnEmptyList(self):
     html = '<html></html>'
     res = Parser.extract_href('')
     print(res)
     self.assertEqual(res, [])

コード例 #44

0

ファイルを表示

 def start():
     Config.start()
     Parser.start()

コード例 #45

0

ファイルを表示

ファイル: test_parser.py プロジェクト: smaass/monito

    def test_ast_generation(self):

        self.assertTrue(isinstance(Parser.parse("false"), Boolean))
        self.assertTrue(isinstance(Parser.parse("2"), Number))
        self.assertTrue(isinstance(Parser.parse("0.2"), Number))

コード例 #46

0

ファイルを表示

ファイル: parser_test.py プロジェクト: fkrishna/PyTTP

 def test_parseTableContents_validEntryPoint_returnStr(self):
     res = Parser.parse(url=config.ENTRYPOINT,
                        section=Section.TABLE_CONTENTS)
     print('')
     self.assertIsInstance(res, str)

コード例 #47

0

ファイルを表示

ファイル: parser_test.py プロジェクト: fkrishna/PyTTP

 def test_resolvePath_emptyArg_returnOrigin(self):
     html = ''
     res = Parser.resolve_path(html, '')
     print(res)
     self.assertEqual(res, html)

コード例 #48

0

ファイルを表示

ファイル: engine.py プロジェクト: derrick0714/web_search_engine

class Engine(object):
	def __init__( self):
		self._istart		= False
		self._status		= Status()

		"""--- load config file----"""
		self._config 		= Configuration();
	
		"""--- core object ----"""
		self._downloader	= None
		self._parser		= None

		"""--- memory models --- """
		self._download_pool	= SafeQueue() #Store the html objects to be downloaded by the downloader
		self._parse_pool	= SafeQueue() #Store the html objects to be parsed by the parser
		
		"""--- checker threads --- """
		"""The target is the function passed in to 
		run in the thread. Those two threads keep checking 
		and assigning jobs to the two thread pools"""
		self._downloader_pool_checker = Thread( target=self.download_pool_checker)
		self._parse_pool_checker = Thread( target=self.parse_pool_checker)
		
		"""---  threads --- """
		self._status_update = Thread( target=self.status_update) #every second, this thread post runtime info to remote mysql

		""" ---strategies--- """
		self._earlyvisithandler	=	EarlyVisitHandler()
		self._robothandler  	=	RobotHandler()
		self._cgihandler		=	CGIHandler()
		self._nestlevelhandler 	=	NestLevelHandler()
		self._schemehandler    	=	SchemeHandler()
		self._filetypehandler	=	FileTypeHandler()
		self._bookmarkhandler	=	BookMarkHandler()
		self._omitindex			=	OmitIndex()
		self._urlextender		=	URLExtender()			
	
		""" ---init the path for saving data, if the folder don't exist, create it ---"""
		self._path			= self._config._down_path+"/"+ strftime('%Y-%m-%d', localtime())+"/"+ strftime('%H-%M-%S', localtime())+"/"
		if not os.path.exists(self._path):
			os.makedirs(self._path)

		self._config._down_path = self._path
		
		self._keywords_links= []

		""" ---Mysql Manager--- """
		self.sqlex      = DatabseManager(self._config)

		#self.f= open("data.txt", 'w')

	def load_seeds(self):
		#load seed info from config file	
		#print "load_seeds 1"
		#load seed from 
		contacter = SearchGoogle(self._config._keywords, self._config._result_num)
		self._keywords_links = contacter.getURLs()
		#append seeds, which from google search result, into download pool
		#print "load_seeds 2"
		#self._keywords_links.insert(0, "https://twitter.com/")
		#self._keywords_links.insert(0, "https://twitter.com/signup?context=login")
		
		i = 0
		for url in self._keywords_links:
			if i < self._config._result_num:
				#print "@@{0}".format(url)
				html_task = Html(url)

				#print "@@1"
				if(self._schemehandler.SchemeChecker(html_task)==False):
					#print("Ingore the wrong scheme, this link is within page {0} , so don't download".format(html_task._parent), html_task._url)
					#print "@@2"
					self._status._scheme+=1
					continue
				if(self._bookmarkhandler.BookMarkChecker(html_task)==True):
					#print("Ingore bookmark link, this link is within page {0} , so don't download".format(html_task._parent), html_task._url)
					#print "@@3"
					self._status._bookmark+=1
					continue
				if(self._cgihandler.FindCGI(html_task)==True):
					#print("Ingore the link contain cgi, this link is within page {0} , so don't download".format(html_task._parent), html_task._url)
					#print "@@4"
					self._status._cgi+=1
					continue
				if(self._nestlevelhandler.checknestlevel(html_task,self._config._parser_nlv)==True):
					self._status._nestlv +=1
					#print "@@5"
					#print("Ingore the link nested too much, this link is within page {0} , so don't download".format(html_task._parent), html_task._url)
					continue
				if(self._filetypehandler.FileTypeChecker(html_task)==False):
					#print "@@6"
					self._status._file_type +=1
					continue
				#print "@@7"
				'''
				if(self._earlyvisithandler.check_visited(html_task) == True):
					self._status._early_visit +=1
					#print("Ingore the link visited before, this link is within page {0} , so don't download".format(html_task._parent), html_task._url)
					continue
				'''
				self._omitindex.Omit(html_task)
				"""
				print "@@8"
				if(self._robothandler.is_allowed(html_task) == False):
					print "@@9"
					self._status._robot +=1
					#print("Blocked by the Robot.txt, this link is within page {0} , so don't download".format(html_task._parent), html_task._url)
					continue
				print "@@10"
				"""
				self._earlyvisithandler.add_entry(html_task._md5, html_task)
				self._download_pool.append(html_task)
				'''If use the following two line of code, then the program won't run, which means checking for revisit works'''
				'''however, the dic should be safe with a lock'''
				#self._visited_dic[html_task._md5] = html_task._url 
				#print(len(self._visited_dic))
				#print "@@11"
			else:

				break
			i+=1
		#print "load_seeds 3"
	def show_welcome(self):
		print("download folder:"+self._path)
		print "key words:"+self._config._keywords
		print "donload thread num: {0}".format(self._config._down_num)
		print "parse thread num: {0}".format(self._config._parser_num)
		print "Load " +str(self._config._result_num)+" results from google search:"
		
		i = 0
		for url in self._keywords_links:
			if i < self._config._result_num:
				print ("[{0}]".format(i)+url)
			i+=1
		print "\n------------------------------------------------------------------------\n"

		#raw_input("press any key to start crawling, press second key to stop")
	
	def wait_for_start(self):
		print "ready for start....."
		print "go to http://dengxu.me/crawling/ to input some key words & see the result "

		while( self.sqlex.read_if_start(self._config)!= True):
			sleep(1)
		print "\n------------------------------------------------------------------------\n"
		print "starting crawling engine...."


	def start(self):
		try:
			self.wait_for_start()

			self._istart = True
			
			"""load seed """
			self.load_seeds()	#load seeds from google search 

			
			"""show welcome info"""
			self.show_welcome()
			self._status._sys_start	= time()

			"""start threads"""
			self._downloader = Downloader( self._config._down_num, self._status)
			self._downloader.start()
			self._parser     = Parser(self._config._parser_num, self._status )
			self._parser.start()
			self._downloader_pool_checker.start()
			self._parse_pool_checker.start()
			self._status_update.start()


			"""notify mysql, i am started"""
			self.sqlex.write_if_start()
			
		except (Exception) as e:
			Log().debug("start failed")
			raise(e)
			return False

		
		
	def stop(self):
		self._istart = False
		""""clear download and parse popl"""
		self._download_pool.clear()
		self._parse_pool.clear()

		"""stop downloader and parser threads"""
		self._downloader.stop()
		self._parser.stop()
		""""Those two checker threads will end when the thread who calls them ends"""
		self._downloader_pool_checker.join()
		self._parse_pool_checker.join()
		self._status_update.join()
		print ("Engine is stopping")

	def pause(self):
		pass

	def finish_download(self, html_task):
			
		
		
		
		sentence = "Downloaded:[No.{0}] time:{1:0.1f} page:depth_parent {2}_{3} http-code: {4} data-size: {5}byes url: {6}"\
			.format(self._status._download_times,time()-self._status._sys_start,html_task._depth,\
		html_task._parent,html_task._return_code, html_task._data_size, html_task._url )

		#if self._status._download_times <= 500 :
		#	self.f.write(sentence+"\n")
			


		"""caculate the path for saving files"""
		full_path = self._path+"[No.{0}]_".format(self._status._download_times)+".html"

		"""save html data to files"""
		#f= open(full_path, 'w')
		#f.write(html_task._data)
		#f.close()


		"""After downloading, pass the data(still using the html objects) to the parse pool"""
		self._parse_pool.append(html_task)




	def finish_parse(self, html_task):
		'''
		print("parsed:[No.{0}] time:{1:0.1f} page:depth_parent {2}_{3} http-status: {4} data-size: {5}byes url:{6}"\
			.format(self._status._download_times,time()-self._status._sys_start,html_task._depth,\
		html_task._parent,html_task._return_code, html_task._data_size, html_task._url))
		'''
		"""After parsing, pass the urls to be downloaded to the download pool"""
		if(self._earlyvisithandler.check_visited(html_task) == True):
			#print("Ingore the link visited before, this link is within page {0} , so don't put it in queue".format(html_task._parent), html_task._url)
			self._status._early_visit +=1
			return
		if(self._robothandler.is_allowed(html_task) == False):
			#print("Blocked by the Robot.txt, this link is within page {0} , so don't download".format(html_task._parent), html_task._url)
			self._status._robot +=1
			return
		
		self._earlyvisithandler.add_entry(html_task._md5, html_task)
		self._download_pool.append(html_task)
		




	def download_pool_checker(self):
		while (self._istart == True):
			new_download_task = self._download_pool.pop_left()
			"""If there is no task remain in the download pool, put the thread into sleep"""
			"""else pop the new task, and download it"""
			"""for the engine to get the result to put into the parse pool, we need to pass the function finish_download down as a callback"""
			
			if (new_download_task == None):
				#print("No task remaining in download_pool")
				sleep(0.1)
			else:
				self._downloader.queue_download_task(new_download_task , self.finish_download)


	def parse_pool_checker(self):
		while (self._istart == True):
			new_parse_task = self._parse_pool.pop_left()
			if (new_parse_task == None):
				#print("sleeping")
				sleep(0.1)				
			else:

				self._parser.queue_parse_task(new_parse_task, self.finish_parse)





	#~~~see result at http://dengxu.me/crawling/
	def status_update(self):

		while (self._istart == True):

			self._status._download_queue = self._downloader.len()
			self._status._parse_queue = self._parser.len()
			
			
			sentence = "[time: {0:0.1f}],queue:{8}, down: {1}, total: {2:0.1f}MB | queue:{9}, parsed: {3},scheme:{10}, cig: {4}, bookmark: {11} type {12} visited: {5}, robot: {6},nestlv: {7} | error: 404: {13} , timeout: {14}"\
			.format( time()-self._status._sys_start,\
		 	self._status._download_times, float(self._status._download_size)/1024/1024, self._status._parse_times\
		 	,self._status._cgi, self._status._early_visit, self._status._robot, self._status._nestlv\
		 	,self._downloader.len(), self._parser.len(),self._status._scheme_type, self._status._bookmark, self._status._file_type\
		 	,self._status._404,self._status._socket_timeout)
			
			print sentence

			#if( self._status._download_times > 500):
			#	self.f.write( sentence+"\n")
			

			"""update status tp mysql"""
			self.sqlex.write_status(self._status)
			
			"""update recent download url"""
			self.sqlex.write_recent_download(self._status)
			
			sleep(1)

コード例 #49

0

ファイルを表示

ファイル: parser_test.py プロジェクト: fkrishna/PyTTP

 def test_resolvePath_emptyAttr_returnOrigin(self):
     html = '<a href=""></a>'
     res = Parser.resolve_path(html, config.HOST)
     print(res)
     self.assertEqual(res, html)

コード例 #50

0

ファイルを表示

ファイル: test_parser.py プロジェクト: smaass/monito

    def test_balanced_parens(self):

        self.assertTrue(Parser.balanced_parens("()")[0])
        self.assertFalse(Parser.balanced_parens("(")[0])
        self.assertFalse(Parser.balanced_parens(")")[0])
        self.assertTrue(Parser.balanced_parens("(a)")[0])
        self.assertTrue(Parser.balanced_parens("[][]")[0])
        self.assertTrue(Parser.balanced_parens("(a [b] (c {d}))")[0])
        self.assertTrue(Parser.balanced_parens("(ab [c e (e) {a}] [d])")[0])
        self.assertFalse(Parser.balanced_parens("(a))")[0])
        self.assertFalse(Parser.balanced_parens("([][]}")[0])
        self.assertFalse(Parser.balanced_parens("{[[]}")[0])
        self.assertFalse(Parser.balanced_parens(")ab(")[0])

コード例 #51

0

ファイルを表示

# Author: Abubakar Nur Khalil
# License: MIT
# Purpose: Appropriate parsed output from Parser

from utils.tokens import TokenType

from core.scanner import Scanner
from core.parser import Parser

from tools.custom_syntax import Scanner as _Virgil
from tools.custom_syntax import Parser  as _Dante

# Remember we always need to generate the KSL first and pass it over
KSL = _Dante(_Virgil('').scan()).parse()

source = """
var name = "ank";
77.67 * (8 // 2);
"""

print('Source code:')
print(source)

tks = Scanner(source, KSL[0]).scan()

pr = Parser(tks, KSL[1])

print("\nFirst Token is variable (VAR):", pr.check(TokenType.VAR)) # True

コード例 #52

0

ファイルを表示

ファイル: test_parser.py プロジェクト: smaass/monito

    def to_arg(self, arg_string):

        sexpr = Parser.string_to_sexpr(arg_string)
        return Parser.parse_arg(sexpr)