Ejemplo n.º 1
0
    def test_sortHostnames(self):
        parser = URLParser()
        hostnames = {"ccc.com": 2, "bbb.com": 3, "aaa.com": 2}

        sortedNames = parser.sortHostnames(hostnames)
        self.assertEquals(sortedNames[0], ('bbb.com', 3))
        self.assertEquals(sortedNames[1], ('aaa.com', 2))
        self.assertEquals(sortedNames[2], ('ccc.com', 2))
Ejemplo n.º 2
0
    def test_parseURLs(self):
        parser = URLParser()
        hostnames = parser.parseURLs("urls.txt")

        self.assertEqual(len(hostnames), 5)
        self.assertEqual(hostnames["twitter.com"], 3)
        self.assertEqual(hostnames["abcnews.go.com"], 2)
        self.assertEqual(hostnames["google.co.uk"], 1)
        self.assertEqual(hostnames["newsfeed.time.com"], 1)
        self.assertEqual(hostnames["world.time.com"], 1)
Ejemplo n.º 3
0
 def setUp(self, logFile='log/URLParser.log'):
     self.logger = Logger(logFile, 'DEBUG', 'Sub')
     self.logger = self.logger.getLogger()
     self.parser1 = URLParser('am://test.cmc.ec.gc.ca:24901')
     self.parser2 = URLParser('wmo://192.108.62.1:24901')
     self.parser3 = URLParser('file://localhost//apps/px/operator')
     self.parser4 = URLParser(
         'amqp://*****:*****@test.cmc.ec.gc.ca//data')
     self.parser5 = URLParser('amis://test.cmc.ec.gc.ca:24901')
     self.parser6 = URLParser('ftp://*****:*****@192.108.62.1//data')
Ejemplo n.º 4
0
    def readConfigFile(self, filePath, currentDir, currentFileOption):
        def isTrue(s):
            if  s == 'True' or s == 'true' or s == 'yes' or s == 'on' or \
                s == 'Yes' or s == 'YES' or s == 'TRUE' or s == 'ON' or \
                s == '1' or  s == 'On' :
                return True
            else:
                return False

        def stringToOctal(string):
            if len(string) != 3:
                return 0644
            else:
                return int(string[0]) * 64 + int(string[1]) * 8 + int(
                    string[2])

        try:
            config = open(filePath, 'r')
        except:
            (type, value, tb) = sys.exc_info()
            print("Type: %s, Value: %s" % (type, value))
            return

        for line in config.readlines():
            words = line.split()
            if (len(words) >= 2 and not re.compile('^[ \t]*#').search(line)):
                try:
                    if words[0] == 'accept':
                        cmask = re.compile(words[1])
                        cFileOption = currentFileOption
                        if len(words) > 2: cFileOption = words[2]
                        self.masks.append(
                            (words[1], currentDir, cFileOption, cmask, True))
                    elif words[0] == 'reject':
                        cmask = re.compile(words[1])
                        self.masks.append((words[1], currentDir,
                                           currentFileOption, cmask, False))

                    elif words[0] == 'imask':
                        self.masks_deprecated.append(
                            (words[1], currentDir, currentFileOption))
                    elif words[0] == 'emask':
                        self.masks_deprecated.append((words[1], ))

                    elif words[0] == 'directory':
                        currentDir = words[1]
                    elif words[0] == 'filename':
                        currentFileOption = words[1]
                    elif words[0] == 'include':
                        fileName = PXPaths.TX_CONF + words[1]
                        self.readConfigFile(fileName, currentDir,
                                            currentFileOption)
                    elif words[0] == 'destination':
                        self.url = words[1]
                        urlParser = URLParser(words[1])
                        (self.protocol, currentDir, self.user, self.passwd,
                         self.host, self.port) = urlParser.parse()
                        if len(words) > 2:
                            currentFileOption = words[2]
                    elif words[0] == 'validation':
                        self.validation = isTrue(words[1])
                    elif words[0] == 'purgeAlias':
                        self.purgeAliases[words[1]] = words[2]
                    elif words[0] == 'purge':
                        try:
                            self.purge = self.purgeAliases[words[1]]
                            self.parsePurgeInstructions(
                                self.purgeAliases[words[1]])
                        except:
                            self.purge = words[1]
                            self.parsePurgeInstructions(words[1])

                    elif words[0] == 'noduplicates':
                        self.nodups = isTrue(words[1])
                    elif words[0] == 'patternMatching':
                        self.patternMatching = isTrue(words[1])
                    elif words[0] == 'keepAlive':
                        self.keepAlive = isTrue(words[1])
                    elif words[0] == 'mtime':
                        self.mtime = int(words[1])
                    elif words[0] == 'sorter':
                        self.sorter = words[1]
                    elif words[0] == 'type':
                        self.type = words[1]
                    elif words[0] == 'protocol':
                        self.protocol = words[1]
                    elif words[0] == 'maxLength':
                        self.maxLength = int(words[1])
                    elif words[0] == 'host':
                        self.host = words[1]
                    elif words[0] == 'port':
                        self.port = int(words[1])
                    elif words[0] == 'user':
                        self.user = words[1]
                    elif words[0] == 'password':
                        self.passwd = words[1]
                    elif words[0] == 'ssh_keyfile':
                        self.ssh_keyfile = words[1]
                    elif words[0] == 'batch':
                        self.batch = int(words[1])
                    elif words[0] == 'kbytes_ps':
                        self.kbytes_ps = int(words[1]) + 0.0
                    elif words[0] == 'cache_size':
                        self.cache_size = int(words[1])
                    elif words[0] == 'debug' and isTrue(words[1]):
                        self.debug = True
                    elif words[0] == 'timeout':
                        self.timeout = int(words[1])
                    elif words[0] == 'chmod':
                        self.chmod = int(words[1])
                    elif words[0] == 'timeout_send':
                        self.timeout_send = int(words[1])
                    elif words[0] == 'lock':
                        self.lock = words[1]
                    elif words[0] == 'ftp_mode':
                        self.ftp_mode = words[1]
                    elif words[0] == 'dir_pattern':
                        self.dir_pattern = isTrue(words[1])
                    elif words[0] == 'binary':
                        self.binary = isTrue(words[1])
                    elif words[0] == 'dir_mkdir':
                        self.dir_mkdir = isTrue(words[1])
                    elif words[0] == 'destfn_script':
                        self.destfn_execfile = words[1]
                    elif words[0] == 'dx_script':
                        self.dx_execfile = words[1]
                    elif words[0] == 'fx_script':
                        self.fx_execfile = words[1]
                    elif words[0] == 'send_script':
                        self.send_execfile = words[1]

                    elif words[0] == 'am_dest_thread':
                        if self.am_dest_thread == None:
                            self.am_dest_thread = {}
                        self.am_dest_thread[words[1]] = int(words[2])

                    elif words[0] == 'logrotate':
                        if words[1].isdigit():
                            self.logger.setBackupCount(int(words[1]))

                    # AMQP
                    elif words[0] == 'exchange_content':
                        self.exchange_content = words[1]
                    elif words[0] == 'exchange_key':
                        self.exchange_key = words[1]
                    elif words[0] == 'exchange_name':
                        self.exchange_name = words[1]
                    elif words[0] == 'exchange_realm':
                        self.exchange_realm = words[1]
                    elif words[0] == 'exchange_type':
                        if words[1] in [
                                'fanout', 'direct', 'topic', 'headers'
                        ]:
                            self.exchange_type = words[1]
                        else:
                            self.logger.error("Problem with exchange_type %s" %
                                              words[1])

                except:
                    self.logger.error(
                        "Problem with this line (%s) in configuration file of client %s"
                        % (words, self.name))

        #if not self.validation:
        #    self.sorter = 'None'    # Must be a string because eval will be subsequently applied to this

        config.close()
Ejemplo n.º 5
0
    def readConfig(self, filePath):
        def isTrue(s):
            if  s == 'True' or s == 'true' or s == 'yes' or s == 'on' or \
                s == 'Yes' or s == 'YES' or s == 'TRUE' or s == 'ON' or \
                s == '1' or  s == 'On' :
                return True
            else:
                return False

        try:
            config = open(filePath, 'r')
        except:
            (type, value, tb) = sys.exc_info()
            print("Type: %s, Value: %s" % (type, value))
            return

        # current dir and filename could eventually be used
        # for file renaming and perhaps file move (like a special receiver/dispatcher)

        currentDir = '.'  # just to preserve consistency with client : unused in source for now
        currentFileOption = 'WHATFN'  # just to preserve consistency with client : unused in source for now
        currentTransformation = 'GIFFY'  # Default transformation for tmasks
        currentLST = None  # a list consisting of one directory followed one or more file patterns

        for line in config.readlines():
            words = line.split()
            if (len(words) >= 2 and not re.compile('^[ \t]*#').search(line)):
                try:
                    if words[0] == 'extension':
                        if len(words[1].split(':')) != 5:
                            self.logger.error(
                                "Extension (%s) for source %s has wrong number of fields"
                                % (words[1], self.name))
                        else:
                            self.extension = ':' + words[1]
                            self.extension = self.extension.replace(
                                '-NAME', self.name)
                    elif words[0] == 'arrival_extension':
                        if len(words[1].split(':')) != 5:
                            self.logger.error(
                                "arrival_extension (%s) for source %s has wrong number of fields"
                                % (words[1], self.name))
                        else:
                            self.arrival_extension = ':' + words[1]
                            self.arrival_extension = self.arrival_extension.replace(
                                '-NAME', self.name)
                    elif words[0] == 'accept':
                        cmask = re.compile(words[1])
                        self.masks.append((words[1], currentDir,
                                           currentFileOption, cmask, True))
                    elif words[0] == 'reject':
                        cmask = re.compile(words[1])
                        self.masks.append((words[1], currentDir,
                                           currentFileOption, cmask, False))
                    elif words[0] == 'routemask':
                        self.routemask = isTrue(words[1])
                    elif words[0] == 'routing_version':
                        self.routing_version = int(words[1])
                    elif words[0] == 'noduplicates':
                        self.nodups = isTrue(words[1])
                    elif words[0] == 'imask':
                        self.masks_deprecated.append(
                            (words[1], currentDir, currentFileOption))
                    elif words[0] == 'emask':
                        self.masks_deprecated.append((words[1], ))
                    elif words[0] == 'timask':
                        self.tmasks.append((words[1], currentTransformation))
                    elif words[0] == 'temask':
                        self.tmasks.append((words[1], ))
                    elif words[0] == 'transformation':
                        currentTransformation = words[1]
                    elif words[0] == 'batch':
                        self.batch = int(words[1])
                    elif words[0] == 'cache_size':
                        self.cache_size = int(words[1])
                    elif words[0] == 'bulletin_type':
                        self.bulletin_type = words[1]
                    elif words[0] == 'type':
                        self.type = words[1]
                    elif words[0] == 'port':
                        self.port = int(words[1])
                    elif words[0] == 'AddSMHeader' and isTrue(words[1]):
                        self.addSMHeader = True
                    elif words[0] == 'addStationInFilename':
                        self.addStationInFilename = isTrue(words[1])
                    elif words[0] == 'patternMatching':
                        self.patternMatching = isTrue(words[1])
                    elif words[0] == 'clientsPatternMatching':
                        self.clientsPatternMatching = isTrue(words[1])
                    elif words[0] == 'validation' and isTrue(words[1]):
                        self.validation = True
                    elif words[0] == 'keepAlive':
                        self.keepAlive = isTrue(words[1])
                    elif words[0] == 'debug' and isTrue(words[1]):
                        self.debug = True
                    elif words[0] == 'mtime':
                        self.mtime = int(words[1])
                    elif words[0] == 'sorter':
                        self.sorter = words[1]
                    elif words[0] == 'header':
                        self.headers.append(words[1])
                    elif words[0] == 'hours':
                        self.issue_hours.append(words[1])
                    elif words[0] == 'primary':
                        self.issue_primary.append(words[1])
                    elif words[0] == 'cycle':
                        self.issue_cycle.append(words[1])
                    elif words[0] == 'feed':
                        self.feeds.append(words[1])
                    elif words[0] == 'routingTable':
                        self.routingTable = words[1]
                    elif words[0] == 'fx_script':
                        self.fx_execfile = words[1]
                    elif words[0] == 'lx_script':
                        self.lx_execfile = words[1]
                    elif words[0] == 'pull_script':
                        self.pull_execfile = words[1]
                    elif words[0] == 'vip':
                        self.vip = words[1]

                    elif words[0] == 'arrival':
                        if self.mapEnteteDelai == None:
                            self.mapEnteteDelai = {}
                        self.mapEnteteDelai[words[1]] = (int(words[2]),
                                                         int(words[3]))

                    elif words[0] == 'logrotate':
                        if words[1].isdigit():
                            self.logger.setBackupCount(int(words[1]))

                    # options for pull
                    elif words[0] == 'directory':
                        currentDir = words[1]
                        currentLST = []
                        # permit directory duplications but warn
                        for lst in self.pulls:
                            if lst[0] == currentDir:
                                currentLST = lst
                                break
                        if len(currentLST) != 0:
                            self.logger.warning(
                                "This directory appears twice %s" % currentDir)
                            self.logger.warning("Please correct your config")
                            continue
                        # normal directory addition
                        currentLST.append(currentDir)
                        self.pulls.append(currentLST)
                    elif words[0] == 'get':
                        currentFilePattern = words[1]
                        currentLST.append(currentFilePattern)
                    elif words[0] == 'destination':
                        self.url = words[1]
                        urlParser = URLParser(words[1])
                        (self.protocol, currentDir, self.user, self.passwd,
                         self.host, self.port) = urlParser.parse()
                        if len(words) > 2:
                            currentFileOption = words[2]
                        currentLST = []
                        currentLST.append(currentDir)
                        self.pulls.append(currentLST)
                    elif words[0] == 'protocol':
                        self.protocol = words[1]
                    elif words[0] == 'host':
                        self.host = words[1]
                    elif words[0] == 'user':
                        self.user = words[1]
                    elif words[0] == 'password':
                        self.passwd = words[1]
                    elif words[0] == 'ssh_keyfile':
                        self.ssh_keyfile = words[1]
                    elif words[0] == 'timeout_get':
                        self.timeout_get = int(words[1])
                    elif words[0] == 'ftp_mode':
                        self.ftp_mode = words[1]
                    elif words[0] == 'pull_sleep':
                        self.pull_sleep = int(words[1])
                    elif words[0] == 'pull_wait':
                        self.pull_wait = int(words[1])
                    elif words[0] == 'delete':
                        self.delete = isTrue(words[1])
                    elif words[0] == 'pull_prefix':
                        self.pull_prefix = words[1]

                        # AMQP
                    elif words[0] == 'exchange_key':
                        self.exchange_key = words[1]
                    elif words[0] == 'exchange_name':
                        self.exchange_name = words[1]
                    elif words[0] == 'exchange_realm':
                        self.exchange_realm = words[1]
                    elif words[0] == 'exchange_type':
                        if words[1] in [
                                'fanout', 'direct', 'topic', 'headers'
                        ]:
                            self.exchange_type = words[1]
                        else:
                            self.logger.error("Problem with exchange_type %s" %
                                              words[1])

                    # options for collector
                    if self.type == 'collector':
                        if words[0] == 'aaxx': self.aaxx = words[1].split(',')
                        if words[0] == 'metar':
                            self.metar = words[1].split(',')
                        elif words[0] == 'taf':
                            self.taf = words[1].split(',')
                        elif words[0] == 'history':
                            self.history = int(words[1])
                        elif words[0] == 'future':
                            self.future = int(words[1])
                        elif words[0] == 'issue':
                            if words[1] == 'all':
                                lst = []
                                lst.append(words[1])
                                self.issue_hours.append(lst)
                            else:
                                lst = words[1].split(",")
                                self.issue_hours.append(lst)
                            self.issue_primary.append(int(words[2]))
                            self.issue_cycle.append(int(words[3]))

                except:
                    self.logger.error(
                        "Problem with this line (%s) in configuration file of source %s"
                        % (words, self.name))

        config.close()

        if len(self.masks) > 0: self.patternMatching = True
        if len(self.masks_deprecated) > 0: self.patternMatching = True

        self.logger.debug("Configuration file of source  %s has been read" %
                          (self.name))
Ejemplo n.º 6
0
import argparse
from URLParser import URLParser

parser = argparse.ArgumentParser(
    description="Parse hostnames from a text file.")
parser.add_argument('file', help="A file containing hostnames")
args = parser.parse_args()

parser = URLParser()
hostnames = parser.parseURLs(args.file)
for hostname, count in parser.sortHostnames(hostnames):
    print(count, hostname)