Esempio n. 1
0
def process_hosts(q, es):
    """
    :param q: The Queue object that hosts should be pulled off of
    :param es: An Elasticsearch connection. This way each worker has its own connection and you don't have to share it
               across multiple workers/processes
    :return:
    """
    bulk_hosts = []

    while True:
        line = q.get()
        if line == "DONE":
            bulk(es, bulk_hosts)
            return True
        host = proccess_host(line)
        cert_hash = hashlib.sha1(host['host'] + host['hash'] + host['source'])
        cert_hash = cert_hash.hexdigest()
        action = {
            "_op_type": "update",
            "_index": 'passive-ssl-hosts-sonar',
            "_type": "host",
            "_id": cert_hash,
            "doc": host,
            "doc_as_upsert": "true"
        }
        bulk_hosts.append(action)
        if len(bulk_hosts) == 500:
            bulk(es, bulk_hosts)
            bulk_hosts = []
Esempio n. 2
0
def process_hosts(q, es, initial):
    """

    :param q: The Queue object that hosts should be pulled off of
    :param es: An Elasticsearch connection. This way each worker has its own connection and you don't have to share it
               across multiple workers/processes
    :param initial: If this is the initial upload then we set the first_seen = last_seen. Other wise first_seen is left
           blank and will be cleaned up later
    :return:
    """
    bulk_hosts = []

    while True:
        line = q.get()
        if line == "DONE":
            bulk(es, bulk_hosts)
            return True
        host = proccess_host(line)
        cert_hash = hashlib.sha1(host['host']+host['hash']+host['source'])
        cert_hash = cert_hash.hexdigest()
        if initial:
            host['first_seen'] = host['last_seen']
        action = {"_op_type": "update", "_index": 'passive-ssl-hosts-umich', "_type": "host", "_id": cert_hash,
                  "doc": line, "doc_as_upsert": "true"}
        bulk_hosts.append(action)
        if len(bulk_hosts) == 500:
            bulk(es, bulk_hosts)
            bulk_hosts = []
def process_hosts_file(gzfilename, key, logger, host='localhost', batchsize=16384, index='hosts', sourcetype='sonar-host', useesid=False):
    logger.warning("Loading file {f} at {d}".format(f=gzfilename, d=datetime.now()))
    hec = http_event_collector(key, host)
    with gzip.open(gzfilename, 'rb') as resultsfile:
        m = re.search('.*\/(\d{8})', gzfilename)
        filedate = m.group(1)
        filedate_struct = time.strptime(filedate, "%Y%m%d")
        filedate_epoch = time.mktime(filedate_struct)
        batchcount = 0
        for line in resultsfile:
            cleanline = line.strip('\n')
            (host, certhash) = cleanline.split(',', 1)
            newhost = {}
            newhost['host'] = host
            newhost['hash'] = certhash
            newhost['seen'] = filedate
            newhost['seen_epoch'] = filedate_epoch
            if useesid:
                cert_hash = hashlib.sha1(newhost['host']+newhost['hash']+'sonar')
                newhost['id'] = cert_hash.hexdigest()
            newhost = proccess_host(newhost, logger)
            payload = {}
            payload.update({"index":index})
            payload.update({"host":host})
            payload.update({"sourcetype":sourcetype})
            payload.update({"source":gzfilename})
            payload.update({"event":newhost})
            hec.batchEvent(payload)
            batchcount = batchcount + 1
            if batchcount == batchsize:
                hec.flushBatch()
                batchcount = 0
        if batchcount > 0:
            hec.flushBatch()
Esempio n. 4
0
def process_hosts_file(file_queue,
                       key,
                       hostlist=['localhost'],
                       index='sonarsslhost',
                       sourcetype='sonarsslhost',
                       batchsize=16384,
                       useesid=False):
    logger = logging.getLogger('SSLImporter')
    while True:
        host = random.choice(hostlist)
        print host
        hec = http_event_collector(key, host)
        gzfilename = file_queue.get()
        if gzfilename == "DONE":
            return True
        logger.warning("Loading file {f} at {d}".format(f=gzfilename,
                                                        d=datetime.now()))
        with gzip.open(gzfilename, 'rb') as resultsfile:
            m = re.search('.*\/(\d{8})', gzfilename)
            if m:
                filedate = m.group(1)
            else:
                m = re.search('.*\/(\d{4}-\d{2}-\d{2})', gzfilename)
                filedate = m.group(1)
                filedate = re.sub('-', '', filedate, 0, 0)
            filedate_struct = time.strptime(filedate, "%Y%m%d")
            filedate_epoch = time.mktime(filedate_struct)
            batchcount = 0
            for line in resultsfile:
                cleanline = line.strip('\n')
                (host, certhash) = cleanline.split(',', 1)
                newhost = {}
                newhost['host'] = host
                newhost['hash'] = certhash
                newhost['seen'] = filedate
                newhost['seen_epoch'] = filedate_epoch
                if useesid:
                    cert_hash = hashlib.sha1(newhost['host'] +
                                             newhost['hash'] + 'sonar')
                    newhost['id'] = cert_hash.hexdigest()
                newhost = proccess_host(newhost, logger)
                payload = {}
                payload.update({"index": index})
                payload.update({"host": host})
                payload.update({"sourcetype": sourcetype})
                payload.update({"source": gzfilename})
                payload.update({"event": newhost})
                hec.batchEvent(payload)
                batchcount = batchcount + 1
                if batchcount == batchsize:
                    hec.flushBatch()
                    batchcount = 0
            if batchcount > 0:
                hec.flushBatch()
Esempio n. 5
0
def process_hosts(q, es):
    """
    :param q: The Queue object that hosts should be pulled off of
    :param es: An Elasticsearch connection. This way each worker has its own connection and you don't have to share it
               across multiple workers/processes
    :return:
    """
    bulk_hosts = []

    while True:
        line = q.get()
        if line == "DONE":
            bulk(es, bulk_hosts)
            return True
        host = proccess_host(line)
        cert_hash = hashlib.sha1(host['host']+host['hash']+host['source'])
        cert_hash = cert_hash.hexdigest()
        action = {"_op_type": "update", "_index": 'passive-ssl-hosts-sonar', "_type": "host", "_id": cert_hash,
                  "doc": host, "doc_as_upsert": "true"}
        bulk_hosts.append(action)
        if len(bulk_hosts) == 500:
            bulk(es, bulk_hosts)
            bulk_hosts = []