def run(self):
        step = 1
        util = JobConfig(0, 0, None, None)
        best_util = JobConfig(0, 0, None, None)
        state = self.init_state()

        if self.check_constraint:
            while self.check_constraint(state):
                state = self.init_state()

        while step < self.max_step and util.get_util() < self.max_util:
            temperature = self.find_temperature(step)
            new_state = self.generate_neighbor(state)

            if self.check_constraint:
                while self.check_constraint(new_state):
                    new_state = self.generate_neighbor(new_state)
            new_util = self.compute_util(new_state)

            # Utilization = 0 -> very undesirable -> reset
            if new_util.get_util() == 0:
                state = self.init_state()
            elif self.transition(util, new_util, temperature) >= random():
                state = new_state
                util = new_util

            if new_util.compare(best_util):
                best_util = new_util

            step += 1

        return best_util
Beispiel #2
0
    def run(self):
        step = 1
        util = JobConfig(0, 0, None, None)
        best_util = JobConfig(0, 0, None, None)
        state = self.init_state()

        if self.check_constraint:
            while self.check_constraint(state):
                state = self.init_state()

        while step < self.max_step and util.get_util() < self.max_util:
            temperature = self.find_temperature(step)
            new_state = self.generate_neighbor(state)

            if self.check_constraint:
                while self.check_constraint(new_state):
                    new_state = self.generate_neighbor(new_state)
            new_util = self.compute_util(new_state)

            # Utilization = 0 -> very undesirable -> reset
            if new_util.get_util() == 0:
                state = self.init_state()
            elif self.transition(util, new_util, temperature) >= random():
                state = new_state
                util = new_util

            if new_util.compare(best_util):
                best_util = new_util

            step += 1

        return best_util
Beispiel #3
0
    def routing_compute_util(self, state):
        self.add_previous_jobs()
        cloned_links = self.graph.copy_links()
        valid = True
        paths_used = []
        util = 0
        job_config = JobConfig()

        for p in state:
            bw, links = p[0], p[1]
            for l in range(len(links) - 1):
                link_id = Link.get_id(links[l], links[l + 1])
                link = cloned_links[link_id]
                link_bandwidth = link.get_bandwidth()

                if link_bandwidth < bw:
                    valid = False
                    break

                link.set_bandwidth(link_bandwidth - bw)

            if not valid:
                # logging.debug(str(state) + " cannot be built")
                break
            else:
                paths_used.append(p)

        if valid:
            self.graph.set_links(cloned_links)
            all_paths_used = deepcopy(paths_used)
            for job in self.jobs_config.values():
                all_paths_used.extend(job.get_used_paths())
            self.graph.set_flow(all_paths_used)

            util = 0
            total_util = 0
            for p in paths_used:
                flow = self.graph.get_flow(Flow.get_id(p[1][0], p[1][-1]))
                util += (flow.get_requested_bandwidth() + flow.get_effective_bandwidth())

            for p in all_paths_used:
                flow = self.graph.get_flow(Flow.get_id(p[1][0], p[1][-1]))
                total_util += (flow.get_requested_bandwidth() + flow.get_effective_bandwidth())

            job_config = JobConfig(util, total_util, copy_links(cloned_links), paths_used)
            self.reset()

        return job_config
 def test_case_save_resolved_ipdata(self):
     """Test the process of ip resolution and file placement in S3."""
     config = JobConfig().getconfig()
     utility.check_s3path(config)
     ipResolver = IPResolver()
     panoplyreader = PanoplyImport()
     datawriter = S3Writer()
     ipdata_geoservice = IPDataService()
     query_panoply = config["panoplydatabase"]["readQuery"]
     for dataframe_ip_address in panoplyreader.getbatch_pandas_dataframe(
             query_panoply):
         dataframes = utility.split_dataframe(dataframe_ip_address)
         processNo = 0
         processList = []
         for frame in enumerate(dataframes):
             processNo = processNo + 1
             process_ipresolve = processes.Process(
                 target=ipResolver.resolve_ipaddress,
                 args=(frame[1], ipdata_geoservice, datawriter, processNo))
             processList.append(process_ipresolve)
             process_ipresolve.start()
             logger.info('processNo-' + str(process_ipresolve.pid))
         for p in processList:
             p.join()
     for i in range(1, config['processConfig']['noOfParallelProcess']):
         s3file_url = datawriter.getfileurl(i)
         self.assertTrue(datawriter.fileWriter.exists(s3file_url))
     print('Ran test_case_save_resolved_ipdata test case')
 def __init__(self):
     """Initialize panoply connection."""
     self.config = JobConfig().getconfig()
     self.logger = utility.getlogger('ip_resolution', 'ip_resolution')
     self.filePath = self.config['storageDetails']['filePath']
     self.fileName = self.config['storageDetails']['fileName']
     self.fileExtension = self.config['storageDetails']['fileExtension']
     self.directoryName = date.today().strftime("%m/%d/%y").replace("/", "_")
     self.awsKey = self.config['storageDetails']['awsKey']
     self.secretKey = self.config['storageDetails']['secretKey']
     self.s3file_url = 's3://' + str(self.filePath) + '/' + str(self.directoryName) + '/' + str(self.fileName)
     self.tableName = self.config['storageDetails']['tableName']
     self.region = self.config['storageDetails']['region']
     username = self.config['panoplydatabase']['user']
     password = self.config['panoplydatabase']['password']
     db = self.config['panoplydatabase']['database']
     host = self.config['panoplydatabase']['host']
     port = self.config['panoplydatabase']['port']
     self.connection = psycopg2.connect(user=username, password=password, host=host, port=port, database=db)
     self.write_command = """BEGIN; truncate """ + self.tableName + """ ; copy """ + self.tableName + """ from '""" + self.s3file_url + """'
     access_key_id  '""" + self.awsKey + """'
     secret_access_key '""" + self.secretKey + """'
     region '""" + self.region + """'
     ignoreheader 1
     null as 'NA'
     removequotes
     delimiter ','; COMMIT;"""
     self.append_command = """BEGIN; copy """ + self.tableName + """ from '""" + self.s3file_url + """'
     access_key_id  '""" + self.awsKey + """'
     secret_access_key '""" + self.secretKey + """'
     region '""" + self.region + """'
 def __init__(self, processNo=0):
     """Initialize IP Data API object connection."""
     self.config = JobConfig().getconfig()
     self.processNo = processNo
     self.logger = utility.getlogger('ip_resolution', 'ip_resolution')
     url = self.config['geoservice']['url']
     apikey = self.config['geoservice']['apikey']
     self.connection_url = url.replace("userkey", apikey)
Beispiel #7
0
    def compute_route(self):
        util = JobConfig()
        chosen_paths = []

        if self.build_paths():
            for path in self.valid_paths.values():
                chosen_paths.append(choice(path))
            util = self.routing_compute_util(chosen_paths)

        return util
 def test_case_panoplyImport_dataframes(self):
     """Test the import of dataframe from Panoply."""
     connector = PanoplyImport()
     config = JobConfig().getconfig()
     query_panoply = 'select distinct ipaddress from activity_us (nolock) where ipaddress is not null limit 40'
     for panda_df in connector.getbatch_pandas_dataframe(query_panoply):
         self.assertTrue(
             panda_df.shape[0] <= config['panoplydatabase']['chunksize']
             and (panda_df['ipaddress'].iloc[0] is not None))
     print('Ran test_case_panoplyImport_dataframes test case')
def split_dataframe(dataframe):
    """Split a Pandas dataframe in sets."""
    size = dataframe.shape[0]
    config = JobConfig().getconfig()
    max_rows = size // config['processConfig']['noOfParallelProcess']
    list_df = [
        dataframe[i:i + max_rows]
        for i in range(0, dataframe.shape[0], max_rows)
    ]
    return list_df
Beispiel #10
0
 def __init__(self):
     """Initialize connection to S3."""
     self.config = JobConfig().getconfig()
     self.logger = utility.getlogger('ip_resolution', 'ip_resolution')
     self.filePath = self.config['storageDetails']['filePath']
     self.fileName = self.config['storageDetails']['fileName']
     self.fileExtension = self.config['storageDetails']['fileExtension']
     self.directoryName = date.today().strftime("%m/%d/%y").replace("/", "_")
     self.awsKey = self.config['storageDetails']['awsKey']
     self.secretKey = self.config['storageDetails']['secretKey']
     self.s3file_url = 's3://' + str(self.filePath) + '/' + str(self.directoryName) \
                       + '/' + str(self.fileName) + str(self.fileExtension)
     self.fileWriter = s3fs.S3FileSystem(self.awsKey, self.secretKey)
def splitdataframe1(dataframe):
    """Alternative approach, but not correct."""
    size = dataframe.shape[0]
    config = JobConfig().getconfig()
    max_rows = size // config['processConfig']['noOfParallelProcess']
    print(max_rows)
    dataframes = []
    while size > max_rows:
        top = dataframe.loc[0:max_rows - 1]
        dataframes.append(top)
        dataframe = dataframe.loc[max_rows:size - 1]
        size = dataframe.shape[0]
    else:
        dataframes.append(dataframe)
    return dataframes
Beispiel #12
0
    def compute_route(self):
        util = JobConfig()
        max_step = self.max_step
        max_util = self.num_mappers * self.num_reducers * self.bandwidth

        if self.build_paths():
            # Executing simulated annealing for map-reduce routing
            simulated_annealing = SimulatedAnnealing(max_util, \
                                                     max_step, \
                                                     self.routing_init_state, \
                                                     self.routing_generate_neighbor, \
                                                     self.routing_compute_util)

            util = simulated_annealing.run()

        # print "util: ", util.get_util()
        return util
Beispiel #13
0
    def _execute_job(self):
        available_hosts = [h for h in self.graph.get_hosts() if h.is_free()]
        hosts = []
        util = JobConfig()

        # There are enough nodes to run the job
        if len(available_hosts) > (self.num_mappers + self.num_reducers):
            for i in range(self.num_mappers + self.num_reducers):
                host_to_add = choice(available_hosts)

                while host_to_add in hosts:
                    host_to_add = choice(available_hosts)
                hosts.append(host_to_add)

            util = self.placement_compute_util(hosts)

        return util
Beispiel #14
0
    def compute_route(self):
        util = JobConfig()

        if self.build_paths():
            max_step = self.max_step
            max_util = self.cur_demand.get_net()

            # Executing simulated annealing for map-reduce routing
            simulated_annealing = SimulatedAnnealing(max_util, \
                                                     max_step, \
                                                     self.routing_init_state, \
                                                     self.routing_generate_neighbor, \
                                                     self.routing_compute_util, \
                                                     self.check_constraint)

            util = simulated_annealing.run()

        # print "util: ", util.get_util()
        return util
 def test_case_ipresolution_pipeline(self):
     """Test the complete flow of ip resolution and result load in panoply."""
     config = JobConfig().getconfig()
     utility.check_s3path(config)
     ip_resolver = IPResolver()
     panoplyreader = PanoplyImport()
     datawriter = S3Writer()
     ipdata_geoservice = IPDataService()
     if ResolveIp.process_chunks(config, ip_resolver, ipdata_geoservice,
                                 panoplyreader, datawriter):
         panoplywriter = PanoplyWriter()
         panoplywriter.save_data()
     query_panoply = 'select max(createdAt) as created from test_sp_ipaddress_parsed_us (nolock) limit 1'
     df = panoplyreader.get_pandas_dataframe(query_panoply)
     now = datetime.now()
     dt_string = now.strftime("%d/%m/%Y")
     df_string = df['created'].iloc[0]
     self.assertTrue(df_string[0:10] == dt_string)
     print('Ran test_case_ipresolution_pipeline test case')
Beispiel #16
0
 def __init__(self):
     """Initialize Panoply connection."""
     self.config = JobConfig().getconfig()
     self.logger = utility.getlogger('ip_resolution', 'ip_resolution')
     username = self.config['panoplydatabase']['user']
     password = self.config['panoplydatabase']['password']
     db = self.config['panoplydatabase']['database']
     host = self.config['panoplydatabase']['host']
     port = self.config['panoplydatabase']['port']
     self.connection_url = 'postgresql://' + str(username) + ':' + str(
         password) + '@' + str(host) + ':' + str(port) + '/' + str(db)
     self.readQuery = self.config['panoplydatabase']['readQuery']
     self.chunksize = self.config['panoplydatabase']['chunksize']
     try:
         self.connection_panoply = create_engine(self.connection_url,
                                                 echo=False)
         self.logger.info('Initialized Panoply connection')
     except Exception as ex:
         self.logger.info('Issue with panoply connection:' + str(ex))
         self.logger.error(utility.print_exception())
 def test_case_s3_writer(self):
     """Test file write to S3 bucket."""
     config = JobConfig().getconfig()
     utility.check_s3path(config)
     panoplyreader = PanoplyImport()
     datawriter = S3Writer()
     # print(dataframe_results)
     query_panoply = 'select distinct ipaddress from activity_us (nolock) where ipaddress is not null limit 10'
     dataframe_results = panoplyreader.get_pandas_dataframe(query_panoply)
     datawriter.append_data(dataframe_results, 0)
     datawriter = S3Writer()
     s3file_url = datawriter.getfileurl(0)
     with datawriter.fileWriter.open(s3file_url, mode='rb') as pointer:
         file_bytes = pointer.read()
     data_bytes = dataframe_results[['ipaddress'
                                     ]].to_csv(None,
                                               header=False,
                                               index=False).encode()
     self.assertEqual(file_bytes[11:], data_bytes)
     print('Ran test_case_s3_writer test case')
Beispiel #18
0
    def execute_job(self, job):
        available_hosts = [h for h in self.graph.get_hosts() if h.is_free()]

        util = JobConfig()

        # There are enough nodes to run the job
        if len(available_hosts) > (self.num_mappers + self.num_reducers):
            max_util = self.num_mappers * self.num_reducers * self.bandwidth
            max_step = self.max_step

            # Executing simulated annealing for map-reduce placement
            simulated_annealing = SimulatedAnnealing(max_util, \
                                                     max_step, \
                                                     self.placement_init_state, \
                                                     self.placement_generate_neighbor, \
                                                     self.placement_compute_util)

            util = simulated_annealing.run()

        return util
 def __init__(self):
     """Initialize instance of IP Resolver."""
     self.config = JobConfig().getconfig()
     self.logger = utility.getlogger('ip_resolution', 'ip_resolution')
Beispiel #20
0
                    target=ipResolver.resolve_ipaddress,
                    args=(frame[1], geoservice, datawriter, processNo))
                processList.append(process_ipresolve)
                process_ipresolve.start()
                logger.info('processNo-' + str(process_ipresolve.pid))
            for p in processList:
                p.join()
                # print(str(p.exitcode))
    except Exception as ex:
        logger.info('Issue in fetching data from Panoply:' + str(ex))
        logger.error(utility.print_exception())
        return False
    logger.info("Finished the batch job in %s seconds" % str(
        (time.time() - seconds) // 1))
    return True


if __name__ == "__main__":
    logger = utility.getlogger('ip_resolution', 'ip_resolution')
    logger.info('Starting ip resolution job')
    config = JobConfig().getconfig()
    utility.check_s3path(config)
    ip_resolver = IPResolver()
    panoplyreader = PanoplyImport()
    datawriter = S3Writer()
    ipdata_geoservice = IPDataService()
    if process_chunks(config, ip_resolver, ipdata_geoservice, panoplyreader,
                      datawriter):
        panoplywriter = PanoplyWriter()
        panoplywriter.save_data()