コード例 #1
0
ファイル: app.py プロジェクト: jkachika/columbus-worker
 def run(self):
     server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
     server.bind(('', 0))
     server_port = server.getsockname()[1]
     logger.info("Worker started on port %d" % server_port)
     logger.info("informing assistant on localhost(%s)" %
                 server.getsockname()[0])
     self.__assistant.set_worker_address(
         (str(server.getsockname()[0]), server_port))
     client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
     logger.info("contacting supervisor on " +
                 str(socket.getfqdn(self.supervisor[0])))
     client.connect(self.supervisor)
     config_request = messaging.Request(messaging.RequestType.CONFIG)
     messaging.send(client, config_request)
     response = messaging.recv(client)
     # 1 GB default container size if none was returned, mentioned in MB = 1024
     self.container_size = response.get(
         messaging.SupervisorConfigResponse.CONTAINER_SIZE, 1024)
     self.global_settings = response.get(
         messaging.SupervisorConfigResponse.GLOBAL_SETTINGS, {})
     logger.info('starting the assistant')
     self.__assistant.start()
     logger.debug('supervisor config response - %s' % response)
     self.scheduler = TargetScheduler(container_size=self.container_size,
                                      worker=self)
     config_request = messaging.Request(
         messaging.RequestType.CONFIG,
         messaging.WorkerConfigResponse(
             port_num=server_port,
             num_cores=psutil.cpu_count(),
             process_id=os.getpid(),
             num_slots=self.scheduler.max_vacancy,
             available_mem=psutil.virtual_memory().available,
             max_memory=psutil.virtual_memory().total))
     logger.info("sending worker configuration to supervisor")
     messaging.send(client, config_request)
     client.close()
     logger.info('starting the scheduler')
     self.scheduler.start()
     server.listen(5)
     while not self.is_terminated():
         client, address = server.accept()
         if not self.is_terminated():
             self.__assistant.add_client(client, address)
     logger.info("Worker is shutting down")
     self.scheduler.awake()
     self.scheduler.terminate()
     if self.scheduler.isAlive():
         logger.info("Awaiting scheduler termination")
         self.scheduler.join()
         logger.info("Scheduler terminated")
     if self.__assistant.isAlive():
         logger.info("Awaiting assistant termination")
         self.__assistant.join()
         logger.info("Assistant terminated")
     logger.info("Shutdown complete")
コード例 #2
0
def send_mail(receivers, subject, message, html=None):
    """
    Sends an email to the recipients. Must be called from an EngineThread. This method will not raise any exception
    if it fails to send a message to the recipients.

    :param list(str) receivers: list of recipient email addresses
    :param str subject: subject of the email
    :param str message: plain text message
    :param str html: HTML message
    """
    if not isinstance(receivers, list):
        raise ValueError(
            'Invalid recipients. Must be a list of email addresses.')
    try:
        if not subject or not message:
            raise ValueError(
                'subject and message body are required to send the email')
        sender = threading.current_thread().username
        master = threading.current_thread().master
        if html is None:
            html = re.sub("\r?\n", "<br/>", message)
        request = messaging.Request(
            messaging.RequestType.EMAIL,
            messaging.WorkerEmailRequest(sender=sender,
                                         receivers=receivers,
                                         subject=subject,
                                         plain=message,
                                         html=html))
        messaging.push(master, request)
    except Exception as e:
        logger.error(e.message)
        logger.error(traceback.format_exc())
コード例 #3
0
ファイル: app.py プロジェクト: jkachika/columbus-worker
 def push_snapshot(self, acquire=True):
     try:
         messaging.push(
             self.worker.supervisor,
             messaging.Request(messaging.RequestType.SNAPSHOT,
                               self.get_snapshot(acquire)))
     except BaseException as e:
         logger.error("Failed to send snapshot to supervisor: %s" %
                      e.message)
         logger.error(traceback.format_exc())
コード例 #4
0
ファイル: core.py プロジェクト: jkachika/columbus-worker
 def save(self, master):
     try:
         update = messaging.Request(
             messaging.RequestType.UPDATE,
             messaging.ModelUpdateRequest(model=self.__class__.__name__,
                                          update=self))
         messaging.push(master, update)
     except Exception as e:
         logger.error(traceback.format_exc())
         raise type(e)("failed to communicate the update to master - %s" %
                       e.message)
コード例 #5
0
ファイル: core.py プロジェクト: jkachika/columbus-worker
 def run(self):
     pid = os.getpid()
     # settings.configure_logging(logger_name='worker', logfile_path="/tmp/worker-%d.log" % pid)
     try:
         if resource_loader is not None:
             import resource
             limit = self.num_containers * self.container_size * 1024 * 1024
             resource.setrlimit(resource.RLIMIT_AS, (limit, limit))
         et = EngineThread(settings=self.settings,
                           master=self.master,
                           history=self.target.history,
                           element=self.target.element,
                           query=self.target.query,
                           input=self.target.input)
         et.start()
         while et.is_running():
             time.sleep(1)
         if et.is_successful():
             self.status["pid"] = pid
             self.status["output"] = jsonpickle.encode(et.output,
                                                       unpicklable=False)
         else:
             self.status["pid"] = pid
             error, trace = et.get_error_trace()
             self.status["error"] = error
             self.status["details"] = trace
         return
     except BaseException as e:
         self.status["pid"] = pid
         self.status["error"] = e
         self.status["details"] = traceback.format_exc()
         return
     finally:
         messaging.push(
             self.worker,
             messaging.Request(messaging.RequestType.FINISHED,
                               messaging.TargetFinishedRequest(pid=pid)))
コード例 #6
0
ファイル: core.py プロジェクト: jkachika/columbus-worker
 def fetch_raw_data(self):
     source = self.query.source
     identifier = self.query.identifier
     if source == 'bigquery':
         feature = self.query.feature
         primitive, op, value = self.query.primitive, self.query.op, self.query.value
         constraint = self.query.constraint
         if int(primitive) == 9:  # 9 indicates a String
             value = "'" + str(value) + "'"
         if constraint is not None:
             where = "(" + str(feature) + str(op) + str(
                 value) + ") AND " + constraint
         else:
             where = "(" + str(feature) + str(op) + str(value) + ")"
         FlowStatus(
             history=self.history.id,
             title='Fetching Data',
             description=
             ('Requesting bigquery to obtain the data based on the mentioned criteria - '
              + where),
             result="Pending",
             target_id=self.element.id,
             element=self.element.name).save(self.master)
         bq_output = bigquery.select_star(qualified_table_name=identifier,
                                          where=where,
                                          sync=True)
         csv = []
         fields = [field["name"] for field in bq_output['fields']]
         for row in bq_output['rows']:
             row_dict = {}
             for index, cell in enumerate(row):
                 row_dict[fields[index]] = cell["v"]
             csv.append(row_dict)
         return csv
     elif source == 'drive':
         metadata = drive.get_metadata(identifier)
         FlowStatus(history=self.history.id,
                    title='Fetching Data',
                    description='Reading the file - ' +
                    str(metadata['name']) + ' having identifier ' +
                    str(metadata['id']),
                    result="Pending",
                    target_id=self.element.id,
                    element=self.element.name).save(self.master)
         return drive.get_file_contents(identifier)
     elif source == 'galileo':
         response = self.query.results
         filesystem = response['filesystem']
         header = response['header']
         csv = []
         contents = []
         for result in response['result']:
             if result['hostName'] == hostname:
                 for filePath in result['filePath']:
                     with open(filePath, 'r') as result_file:
                         contents.extend(result_file.read().splitlines())
                     # if "/%s/" % filesystem not in filePath:
                     #     os.remove(filePath)
             else:
                 address = None
                 for worker in response['workers']:
                     if worker[0] == result['hostName']:
                         address = worker
                         break
                 if address is None:
                     raise ValueError(
                         "Failed to find the address of the host %s to fetch the result files %s"
                         % (result['hostName'], result['filePath']))
                 try:
                     sock = socket.socket(socket.AF_INET,
                                          socket.SOCK_STREAM)
                     logger.info(
                         "Fetching data from remote host with address %s " %
                         str(address))
                     sock.connect((address[0], address[1]))
                     request = messaging.Request(
                         messaging.RequestType.FILE,
                         messaging.WorkerFileRequest(
                             filesystem, result['filePath']))
                     contents = messaging.sendrecv(sock, request)
                 except Exception as e:
                     err_msg = "Failed to obtain the contents of the files %s from %s" % (
                         result['filePath'], result['hostName'])
                     logger.error(err_msg)
                     logger.error(traceback.format_exc())
                     raise type(e)(err_msg)
             if contents:
                 header = [str(column).split(":") for column in header]
                 columns = [str(fields[0]) for fields in header]
                 column_types = [
                     int if str(fields[1]) in ['INT', 'INTEGER'] else
                     float if str(fields[1]) in ['FLOAT', 'DOUBLE'] else
                     long if str(fields[1]) == 'LONG' else str
                     for fields in header
                 ]
                 while contents:
                     row = str(contents.pop(0)).split(",")
                     row_dict = {}
                     for index, column in enumerate(row):
                         cast = column_types[index]
                         row_dict[
                             columns[index]] = cast(column) if not caught(
                                 cast, column) else str(column)
                     csv.append(row_dict)
         return csv
     return []
コード例 #7
0
ファイル: app.py プロジェクト: jkachika/columbus-worker
 def run(self):
     while True:
         client, address = self.clients.get()
         try:
             msg = messaging.recv(client)
             if msg[messaging.Request.
                    TYPE] == messaging.RequestType.PIPELINE:
                 pipeline = msg[messaging.Request.BODY]
                 logger.debug("%s" % pipeline)
                 settings = copy.deepcopy(self.worker.global_settings)
                 settings = deep_update(
                     settings,
                     pipeline.get(messaging.PipelineRequest.SETTINGS, {}))
                 pipeline[messaging.PipelineRequest.SETTINGS] = settings
                 self.worker.scheduler.add_targets(pipeline,
                                                   self.__worker_address)
             elif msg[messaging.Request.
                      TYPE] == messaging.RequestType.WORKLOAD:
                 messaging.send(client,
                                self.worker.scheduler.get_workload())
             elif msg[messaging.Request.
                      TYPE] == messaging.RequestType.SNAPSHOT:
                 messaging.send(
                     client,
                     self.worker.scheduler.get_snapshot(acquire=True))
             elif msg[messaging.Request.TYPE] == messaging.RequestType.FILE:
                 file_request = msg[messaging.Request.BODY]
                 # filesystem = file_request[messaging.WorkerFileRequest.FILESYSTEM]
                 file_paths = file_request[
                     messaging.WorkerFileRequest.FILE_PATHS]
                 contents = []
                 for file_path in file_paths:
                     try:
                         with open(file_path, 'r') as result_file:
                             contents.extend(
                                 result_file.read().splitlines())
                         # if "/%s/" % filesystem not in file_path:
                         #     os.remove(file_path)
                     except:
                         logger.error(traceback.format_exc())
                 messaging.send(client, contents)
                 del contents
             elif msg[messaging.Request.
                      TYPE] == messaging.RequestType.TERMINATION:
                 self.worker.terminate()
                 messaging.push(
                     self.__worker_address,
                     messaging.Request(messaging.RequestType.TERMINATION))
                 break
             elif msg[messaging.Request.
                      TYPE] == messaging.RequestType.FINISHED:
                 self.worker.scheduler.awake()
         except:
             logger.error(
                 "Something went wrong while processing the request from the client"
             )
             logger.error(traceback.format_exc())
         finally:
             try:
                 client.close()
             except:
                 pass
     logger.info("Assistant shutdown complete")
     return