def install(alsi=None): from assemblyline.al.common import forge ds = forge.get_datastore() ip = net.get_hostip() mac = net.get_mac_for_ip(ip) existing_reg = ds.get_node(mac) if existing_reg: alsi.info("Registration already exist. Skipping...") return reg = DEFAULT_CORE_REGISTRATION.copy() reg['hostname'] = net.get_hostname() reg['ip'] = ip reg['mac_address'] = mac reg['machine_info'] = sysinfo.get_machine_info() reg['platform'] = sysinfo.get_platform() if 'roles' not in reg: reg['roles'] = [] if "dispatcher" not in reg["roles"]: reg['roles'].append("dispatcher") if "middleman" not in reg["roles"]: reg['roles'].append("middleman") ds.save_node(mac, reg) alsi.info("Core server registered!")
def start(): bad_disks = [] # Test if smartmontools is installed try: subprocess.call(['smartctl'], stderr=subprocess.PIPE, stdout=subprocess.PIPE) except OSError: # Install smartmontools ret = subprocess.call( ["sudo", "apt-get", "-y", "install", "smartmontools"], stderr=subprocess.PIPE, stdout=subprocess.PIPE) if ret != 0: exit("Can't install smartmontools, stopping...") # Find drives proc = subprocess.Popen(['smartctl', '--scan'], stderr=subprocess.PIPE, stdout=subprocess.PIPE) out, _ = proc.communicate() if out: device = out.split(" ")[0] for x in xrange(16): status_proc = subprocess.Popen( ['smartctl', '-H', '-d', 'megaraid,%s' % x, device], stderr=subprocess.PIPE, stdout=subprocess.PIPE) status_out, status_err = status_proc.communicate() if "INQUIRY failed" in status_out: break else: # Report status if is_drive_ok(status_out): log.info("All is good with drive: %s [disk: %s]" % (device, x)) else: bad_disks.append((device, x)) log.error( "Device %s [disk: %s] has a failure state. Report to your administrator." % (device, x)) if len(bad_disks) > 0: bad_disk_body = { 'hostname': get_hostname(), 'ip': get_hostip(), 'bad_disks': [x[1] for x in bad_disks], 'device': device } msg = message.Message(to="*", sender='harddrive_monitor', mtype=message.MT_HARDDRIVE_FAILURES, body=bad_disk_body).as_dict() statusq = queue.CommsQueue('status') statusq.publish(msg)
def get_mac_for_serviceinstance(servicename, seperator=':'): """ servicename is of form Name.Instance """ hostip = net.get_hostip() hostbyte = int(hostip.split('.')[-1]) name, instance = servicename.split('.') md5hex = hashlib.md5(name).hexdigest() service_byte1 = int(md5hex[:2], 16) service_byte2 = int(md5hex[2:4], 16) mac = [0x52, 0x54, hostbyte, service_byte1, service_byte2, int(instance)] return seperator.join("%02x" % x for x in mac).upper()
def get_machine_info(is_agent=False): import psutil out = {'cores': len(psutil.cpu_percent(interval=0.01, percpu=True)), 'memory': "%.1f" % (float(psutil.phymem_usage().total) / 1024 / 1024 / 1024), 'os': get_osinfo(), 'ip': net.get_hostip()} if is_agent: out['uid'] = "Agent-" + net.get_mac_address() else: out['uid'] = "Core-" + net.get_mac_address() out['name'] = net.get_hostname() return out
def main(): global previous_disk_io, previous_net_io elastic_ip = config.get('logging', {}).get('logserver', {}).get('node', None) elastic_port = config.get('logging', {}).get('logserver', {}).get('elastic', {}).get('port', 9200) if not elastic_ip or not elastic_port: log.error( "Elasticsearch cluster not configured in the seed. There is no need to gather stats on this box." ) sys.exit(1) scheduler = Scheduler() cur_host = get_hostname() cur_ip = get_hostip() es = elasticsearch.Elasticsearch([{ 'host': elastic_ip, 'port': elastic_port }]) scheduler.add_interval_job(calculate_system_metrics, seconds=60, kwargs={ "es": es, "cur_ip": cur_ip, "cur_host": cur_host }) if is_riak(cur_ip, cur_host): scheduler.add_interval_job(calculate_solr_metrics, seconds=60, kwargs={ "es": es, "cur_ip": cur_ip, "cur_host": cur_host }) scheduler.add_interval_job(calculate_riak_metrics, seconds=60, kwargs={ "es": es, "cur_ip": cur_ip, "cur_host": cur_host }) scheduler.daemonic = False scheduler.start()
def __init__( self, service_manager, #pylint: disable=R0913 control_queue=None, debug=False, high=config.core.dispatcher.max.inflight / config.core.dispatcher.shards, pop=forge.get_dispatch_queue().pop, shard='0'): if debug: self.debug = log.info else: self.debug = lambda *msg: None self.hostinfo = { 'ip:': get_hostip(), 'mac_address': get_mac_address(), 'host': get_hostname(), } self.ack_timeout = {} self.child_timeout = {} self.completed = {} self.control_queue = control_queue or \ forge.get_control_queue('control-queue-' + shard) self.drain = False self.entries = {} self.errors = {} self.high = high self.ingest_queue = 'ingest-queue-' + shard self.last_check = 0 self.lock = threading.Lock() self.pop = pop self.queue_size = {} # Reponse queues are named: <hostname>-<pid>-<seconds>-<shard>. self.response_queue = '-'.join((socket.gethostname(), str(os.getpid()), str(int(time.time())), shard)) self.results = {} self.running = False self.score = {} self.service_manager = service_manager self.service_timeout = {} self.shard = shard self.storage_queue = LocalQueue() self.watchers = {} log.info('Dispatcher started. Dispatching to services:{0}'.format( [s for s in service_manager.services]))
def __init__(self, vmcfg=None): self.disk_root = config.workers.virtualmachines.disk_root if not os.path.exists(self.disk_root): os.makedirs(self.disk_root) self.vmm = None self.cfg = vmcfg if vmcfg is None: self.cfg = get_vmcfg_for_localhost() self.vmrevert_scheduler = None self.host_ip = net.get_hostip() self.host_mac = net.get_mac_for_ip(self.host_ip) self.log = logging.getLogger('assemblyline.vmm') self.vm_profiles = {} self.vmm_lock = threading.Lock() self._state = State.INIT self._hostagent_client = ServiceAgentClient(async=True) self.store = forge.get_datastore() self.vm_configs = {}
def start(self): global counts # pylint: disable=W0603 # Publish counters to the metrics sink. counts = counter.AutoExportingCounters( name='dispatcher-%s' % self.shard, host=net.get_hostip(), auto_flush=True, auto_log=False, export_interval_secs=config.system.update_interval, channel=forge.get_metrics_sink(), counter_type='dispatcher') counts.start() self.service_manager.start() # This starts a thread that polls for messages with an exponential # backoff, if no messages are found, to a maximum of one second. minimum = -6 maximum = 0 self.running = True threading.Thread(target=self.heartbeat).start() for _ in range(8): threading.Thread(target=self.writer).start() signal.signal(signal.SIGINT, self.interrupt) time.sleep(2 * int(config.system.update_interval)) exp = minimum while self.running: if self.poll(len(self.entries)): exp = minimum continue if self.drain and not self.entries: break time.sleep(2**exp) exp = exp + 1 if exp < maximum else exp self.check_timeouts() counts.stop()
def __init__(self): self.ip = net.get_hostip() self.mac = net.get_mac_for_ip(self.ip) self.store = forge.get_datastore() self.log = logging.getLogger('assemblyline.agent') self.log.info('Starting HostAgent: MAC[%s] STORE[%s]' % (self.mac, self.store)) # This hosts registration from riak (Hosts tab in UI). self.registration = None self.service_manager = None self.vm_manager = None self.flex_manager = None self.lock = None self.consumer_thread = None self._should_run = False self.host_profile = {} self.executor_thread = None # Chores are actions that we run periodically and which we coallesce # when the same chore is requested multiple times in the same tick. # Jobs are executed as they are received. self.jobs = LocalQueue() self.last_heartbeat = 0 self.rpc_handlers = { AgentRequest.PING: self.ping, AgentRequest.DRAIN: self.drain, AgentRequest.UNDRAIN: self.undrain, AgentRequest.SHUTDOWN: self.shutdown, AgentRequest.VM_LIST: self.list_vms, AgentRequest.VM_START: self.start_vm, AgentRequest.VM_STOP: self.stop_vm, AgentRequest.VM_STOP_ALL: self.stop_all_vms, AgentRequest.VM_RESTART: self.restart_vm, AgentRequest.VM_REFRESH_ALL: self.refresh_vm_all, AgentRequest.VM_REFRESH_FLEET: self.refresh_vm_fleet, AgentRequest.VM_GET_REVERT_TIMES: self.vm_get_revert_times, AgentRequest.START_SERVICES: self.start_services, AgentRequest.STOP_SERVICES: self.stop_services, } self._should_run = True
def __init__(self): # Delay these imports so most nodes don't import them. global Scheduler from apscheduler.scheduler import Scheduler self.bottleneck_queue_sizes = {} self.cores = None self.datastore = forge.get_datastore() self.flex_profile = None self.flex_scheduler = None self.log = logging.getLogger('assemblyline.flex') self.mac = net.get_mac_for_ip(net.get_hostip()) self.main_bottleneck = '' self.needs_cleanup = True self.previous_queue_sizes = {} self.safe_start_dict = {} self.safeq = NamedQueue('safe-start-%s' % self.mac) self.service_manager = None self.ram_mb = None self.tick_count = 0 self.vm_manager = None
def get_vmcfg_for_localhost(): ip = net.get_hostip() mac = net.get_mac_for_ip(ip) store = forge.get_datastore() host_registration = store.get_node(mac) if not host_registration: raise ConfigException('Could not find host registration fr %s' % mac) profile_name = host_registration.get('profile', None) if not profile_name: raise ConfigException('Could not find profile for host: %s' % mac) host_profile = store.get_profile(profile_name) if not host_profile: raise ConfigException('Could not fetch host profile %s' % profile_name) vm_config = host_profile.get('virtual_machines', None) if not vm_config: raise ConfigException('Could not find virtual machine section in %s' % profile_name) store.client.close() return vm_config
def create_transport(url): """ Transport are being initiated using an URL. They follow the normal url format: ftp://user:[email protected]/path/to/file In this example, it will extract the following parameters: scheme: ftp host: host.com user: user password: pass Certain transports can have extra parameters, those parameters need to be specified in the query part of the url. e.g.: sftp://host.com/path/to/file?private_key=/etc/ssl/pkey&private_key_pass=pass&validate_host=true scheme: ftp host: host.com user: password: private_key: /etc/ssl/pkey private_key_pass: pass validate_host: True NOTE: For transport with extra parameters, only specific extra parameters are allow. This is the list of extra parameter allowed: ftp: None http: pki (string) sftp: private_key (string), private_key_pass (string), validate_host (bool) file: None """ parsed = urlparse(url) base = parsed.path or '/' host = parsed.hostname if parsed.password: password = unquote(parsed.password) else: password = '' user = parsed.username or '' scheme = parsed.scheme.lower() if (scheme == 'ftp' or scheme == 'sftp' or scheme == 'http') and (host == get_hostname() or host == get_hostip()): scheme = 'file' if scheme == 'ftp': from assemblyline.al.common.transport.ftp import TransportFTP base = base.replace(config.filestore.ftp_root, "") t = TransportFTP(base=base, host=host, password=password, user=user) elif scheme == "sftp": from assemblyline.al.common.transport.sftp import TransportSFTP def get_extras(parsed_dict): valid_str_keys = ['private_key', 'private_key_pass'] valid_bool_keys = ['validate_host'] out = {} for k, v in parsed_dict.iteritems(): if k in valid_bool_keys: if v[0].lower() == 'true': out[k] = True elif v[0].lower() == 'true': out[k] = False if k in valid_str_keys: out[k] = v[0] return out extras = get_extras(parse_qs(parsed.query)) t = TransportSFTP(base=base, host=host, password=password, user=user, **extras) elif scheme == 'http': from assemblyline.al.common.transport.http import TransportHTTP def get_extras(parsed_dict): valid_str_keys = ['pki'] out = {} for k, v in parsed_dict.iteritems(): if k in valid_str_keys: out[k] = v[0] return out extras = get_extras(parse_qs(parsed.query)) t = TransportHTTP(base=base, host=host, password=password, user=user, **extras) elif scheme == 'file': t = TransportLocal(base=base) else: raise FileStoreException("Unknown transport: %s" % scheme) return t
shard = arg # Globals alertq = queue.NamedQueue('m-alert', **persistent) # df line queue cache = {} cache_lock = RLock() chunk_size = 1000 completeq_name = 'm-complete-' + shard date_fmt = '%Y-%m-%dT%H:%M:%SZ' default_prefix = config.core.middleman.default_prefix dup_prefix = 'w-' + shard + '-' dupq = queue.MultiQueue(**persistent) # df line queue expire_after_seconds = config.core.middleman.expire_after get_whitelist_verdict = forge.get_get_whitelist_verdict() hostinfo = { 'ip:': get_hostip(), 'mac_address': get_mac_address(), 'host': get_hostname(), } ingestq_name = 'm-ingest-' + shard is_low_priority = forge.get_is_low_priority() max_priority = config.submissions.max.priority max_retries = 10 max_time = 2 * 24 * 60 * 60 # Wait 2 days for responses. max_waiting = int(config.core.dispatcher.max.inflight) / (2 * shards) min_priority = 1 priority_value = constants.PRIORITIES retry_delay = 180 retryq = queue.NamedQueue('m-retry-' + shard, **persistent) # df line queue running = True sampling = False
hostname = 'unknownhost' # noinspection PyBroadException try: from assemblyline.common.net import get_hostname hostname = get_hostname() except Exception: # pylint:disable=W0702 pass ip = 'x.x.x.x' # noinspection PyBroadException try: from assemblyline.common.net import get_hostip ip = get_hostip() except Exception: # pylint:disable=W0702 pass AL_SYSLOG_FORMAT = f'{ip} AL %(levelname)8s %(process)5d %(name)20s | %(message)s' AL_LOG_FORMAT = f'%(asctime)-16s %(levelname)8s {hostname} %(process)d %(name)30s | %(message)s' AL_JSON_FORMAT = f'{{' \ f'"@timestamp": "%(asctime)s", ' \ f'"event": {{ "module": "assemblyline", "dataset": "%(name)s" }}, ' \ f'"host": {{ "ip": "{ip}", "hostname": "{hostname}" }}, ' \ f'"log": {{ "level": "%(levelname)s", "logger": "%(name)s" }}, ' \ f'"process": {{ "pid": "%(process)d" }}, ' \ f'"message": %(message)s}}'
self.stop() except RemoteShutdownInterrupt as ri: msg = 'Shutting down due to remote command: %s' % ri self.log.info(msg) self.stop() except Exception as ex: msg = 'Shutting down due to unhandled exception: %s' % get_stacktrace_info(ex) self.log.error(msg) self.stop() class AgentClient(object): def __init__(self, async=False, sender=None): """ If sender is not specified the local MAC is used """ self.sender = sender or net.get_mac_for_ip(net.get_hostip()) self.async = async def _send_agent_rpc(self, mac, command, args=None): result = send_rpc_comms_queue(ControllerRequest( to=mac, mtype=command, body=args, sender=self.sender), async=self.async) if not self.async: if result: return result.body return 'timeout' else: return result
commandq_name = 'a-command' create_alert = forge.get_create_alert() datastore = forge.get_datastore() exit_msgs = ['server closed the connection unexpectedly'] interval = 3 * 60 * 60 logger = logging.getLogger('assemblyline.alerter') max_consecutive_errors = 100 max_retries = 10 running = True alertq = queue.NamedQueue(alertq_name, **persistent_settings) commandq = queue.NamedQueue(commandq_name, **persistent_settings) # Publish counters to the metrics sink. counts = counter.AutoExportingCounters(name='alerter', host=net.get_hostip(), export_interval_secs=5, channel=forge.get_metrics_sink(), auto_log=True, auto_flush=True) counts.start() # noinspection PyUnusedLocal def interrupt(unused1, unused2): # pylint:disable=W0613 global running # pylint:disable=W0603 logger.info("Caught signal. Coming down...") running = False signal.signal(signal.SIGINT, interrupt)