Esempio n. 1
0
def process_chunks(config, ipResolver, geoservice: GeoService,
                   datareader: DataImport, datawriter: DataExport) -> bool:
    """Triggers parallel processes.

    Resolves IP in pandas dataframes chunks and stores ip resolved data in S3.
    """
    # Initialize logging
    logger = utility.getlogger('ip_resolution', 'ip_resolution')
    seconds = time.time()
    try:
        query_panoply = config["panoplydatabase"]["readQuery"]
        for dataframe_ip_address in datareader.getbatch_pandas_dataframe(
                query_panoply):
            dataframes = utility.split_dataframe(dataframe_ip_address)
            processNo = 0
            processList = []
            for frame in enumerate(dataframes):
                processNo = processNo + 1
                process_ipresolve = processes.Process(
                    target=ipResolver.resolve_ipaddress,
                    args=(frame[1], geoservice, datawriter, processNo))
                processList.append(process_ipresolve)
                process_ipresolve.start()
                logger.info('processNo-' + str(process_ipresolve.pid))
            for p in processList:
                p.join()
                # print(str(p.exitcode))
    except Exception as ex:
        logger.info('Issue in fetching data from Panoply:' + str(ex))
        logger.error(utility.print_exception())
        return False
    logger.info("Finished the batch job in %s seconds" % str(
        (time.time() - seconds) // 1))
    return True
Esempio n. 2
0
 def __init__(self):
     """Initialize panoply connection."""
     self.config = JobConfig().getconfig()
     self.logger = utility.getlogger('ip_resolution', 'ip_resolution')
     self.filePath = self.config['storageDetails']['filePath']
     self.fileName = self.config['storageDetails']['fileName']
     self.fileExtension = self.config['storageDetails']['fileExtension']
     self.directoryName = date.today().strftime("%m/%d/%y").replace("/", "_")
     self.awsKey = self.config['storageDetails']['awsKey']
     self.secretKey = self.config['storageDetails']['secretKey']
     self.s3file_url = 's3://' + str(self.filePath) + '/' + str(self.directoryName) + '/' + str(self.fileName)
     self.tableName = self.config['storageDetails']['tableName']
     self.region = self.config['storageDetails']['region']
     username = self.config['panoplydatabase']['user']
     password = self.config['panoplydatabase']['password']
     db = self.config['panoplydatabase']['database']
     host = self.config['panoplydatabase']['host']
     port = self.config['panoplydatabase']['port']
     self.connection = psycopg2.connect(user=username, password=password, host=host, port=port, database=db)
     self.write_command = """BEGIN; truncate """ + self.tableName + """ ; copy """ + self.tableName + """ from '""" + self.s3file_url + """'
     access_key_id  '""" + self.awsKey + """'
     secret_access_key '""" + self.secretKey + """'
     region '""" + self.region + """'
     ignoreheader 1
     null as 'NA'
     removequotes
     delimiter ','; COMMIT;"""
     self.append_command = """BEGIN; copy """ + self.tableName + """ from '""" + self.s3file_url + """'
     access_key_id  '""" + self.awsKey + """'
     secret_access_key '""" + self.secretKey + """'
     region '""" + self.region + """'
Esempio n. 3
0
 def __init__(self, processNo=0):
     """Initialize IP Data API object connection."""
     self.config = JobConfig().getconfig()
     self.processNo = processNo
     self.logger = utility.getlogger('ip_resolution', 'ip_resolution')
     url = self.config['geoservice']['url']
     apikey = self.config['geoservice']['apikey']
     self.connection_url = url.replace("userkey", apikey)
Esempio n. 4
0
 def __init__(self):
     """Initialize connection to S3."""
     self.config = JobConfig().getconfig()
     self.logger = utility.getlogger('ip_resolution', 'ip_resolution')
     self.filePath = self.config['storageDetails']['filePath']
     self.fileName = self.config['storageDetails']['fileName']
     self.fileExtension = self.config['storageDetails']['fileExtension']
     self.directoryName = date.today().strftime("%m/%d/%y").replace("/", "_")
     self.awsKey = self.config['storageDetails']['awsKey']
     self.secretKey = self.config['storageDetails']['secretKey']
     self.s3file_url = 's3://' + str(self.filePath) + '/' + str(self.directoryName) \
                       + '/' + str(self.fileName) + str(self.fileExtension)
     self.fileWriter = s3fs.S3FileSystem(self.awsKey, self.secretKey)
Esempio n. 5
0
 def __init__(self):
     """Initialize Panoply connection."""
     self.config = JobConfig().getconfig()
     self.logger = utility.getlogger('ip_resolution', 'ip_resolution')
     username = self.config['panoplydatabase']['user']
     password = self.config['panoplydatabase']['password']
     db = self.config['panoplydatabase']['database']
     host = self.config['panoplydatabase']['host']
     port = self.config['panoplydatabase']['port']
     self.connection_url = 'postgresql://' + str(username) + ':' + str(
         password) + '@' + str(host) + ':' + str(port) + '/' + str(db)
     self.readQuery = self.config['panoplydatabase']['readQuery']
     self.chunksize = self.config['panoplydatabase']['chunksize']
     try:
         self.connection_panoply = create_engine(self.connection_url,
                                                 echo=False)
         self.logger.info('Initialized Panoply connection')
     except Exception as ex:
         self.logger.info('Issue with panoply connection:' + str(ex))
         self.logger.error(utility.print_exception())
Esempio n. 6
0
 def setlogger(self, processNo):
     """Get Logger for use in a python child process."""
     self.logger = utility.getlogger('s3writer', 's3writer', processNo)
Esempio n. 7
0
 def setlogger(self, processNo):
     """Get Logger for use in a python child process."""
     self.logger = utility.getlogger('ipdataservice', 'ipdataservice',
                                     processNo)
Esempio n. 8
0
                    target=ipResolver.resolve_ipaddress,
                    args=(frame[1], geoservice, datawriter, processNo))
                processList.append(process_ipresolve)
                process_ipresolve.start()
                logger.info('processNo-' + str(process_ipresolve.pid))
            for p in processList:
                p.join()
                # print(str(p.exitcode))
    except Exception as ex:
        logger.info('Issue in fetching data from Panoply:' + str(ex))
        logger.error(utility.print_exception())
        return False
    logger.info("Finished the batch job in %s seconds" % str(
        (time.time() - seconds) // 1))
    return True


if __name__ == "__main__":
    logger = utility.getlogger('ip_resolution', 'ip_resolution')
    logger.info('Starting ip resolution job')
    config = JobConfig().getconfig()
    utility.check_s3path(config)
    ip_resolver = IPResolver()
    panoplyreader = PanoplyImport()
    datawriter = S3Writer()
    ipdata_geoservice = IPDataService()
    if process_chunks(config, ip_resolver, ipdata_geoservice, panoplyreader,
                      datawriter):
        panoplywriter = PanoplyWriter()
        panoplywriter.save_data()
Esempio n. 9
0
 def setlogger(self, processNo):
     """Get Logger for use in a python child process."""
     self.logger = utility.getlogger('panoply_connector',
                                     'panoplyconnector', processNo)
Esempio n. 10
0
 def setlogger(self, processNo):
     """Get Logger for use in a python child process."""
     self.logger = utility.getlogger('ip_resolution', 'ipresolver',
                                     processNo)
Esempio n. 11
0
 def __init__(self):
     """Initialize instance of IP Resolver."""
     self.config = JobConfig().getconfig()
     self.logger = utility.getlogger('ip_resolution', 'ip_resolution')