Exemple #1
0
    def run(self):
        global CONFIG_PATH
        config.init(path=CONFIG_PATH)
        init_logging()
        ret_value = 0

        # Loop and accept messages from both channels, acting accordingly
        while True:
            next_task = self.task_queue.get()
            if next_task is None:
                # Poison pill means shutdown
                self.task_queue.task_done()
                logging.debug("%s Got poison pill" % (os.getpid()))
                break
            try:
                with open(next_task) as nextfile:
                    file_buffer = nextfile.read()
            except IOError:
                logging.debug("Error opening: %s" % (next_task))
                self.task_queue.task_done()
                self.result_queue.put(answer)
                continue

            resultJSON = ""
            try:
                # perform the work
                result = ScanResult()
                result.source = SOURCE 
                result.startTime = time.time()
                result.level = level_metadata
                myexternalVars = ExternalVars(filename=next_task,
                                             source=SOURCE,
                                             ephID=EPHID,
                                             extMetaData=EXT_METADATA)

                Dispatch(file_buffer, result, 0, externalVars=myexternalVars, extScanModules=SCAN_MODULES)

                resultJSON = getJSON(result)
                if SAVE_PATH:
                    rootObject = getRootObject(result)
                    UID_SAVE_PATH = "%s/%s" % (SAVE_PATH, get_scanObjectUID(rootObject))
                    if not os.path.exists(UID_SAVE_PATH):
                        try:
                            os.makedirs(UID_SAVE_PATH)
                        except (OSError, IOError) as e:
                            error("\nERROR: unable to write to %s...\n" % (UID_SAVE_PATH))
                            raise
                    for uid, scanObject in result.files.iteritems():
                        with open("%s/%s" % (UID_SAVE_PATH, uid), "wb") as f:
                            f.write(scanObject.buffer)
                        if scanObject.filename and scanObject.depth != 0:
                            linkPath = "%s/%s" % (UID_SAVE_PATH, scanObject.filename.replace("/","_"))
                            if not os.path.lexists(linkPath):
                                os.symlink("%s" % (uid), linkPath)
                        elif scanObject.filename:
                            filenameParts = scanObject.filename.split("/")
                            os.symlink("%s" % (uid), "%s/%s" % (UID_SAVE_PATH, filenameParts[-1]))
                    with open("%s/%s" % (UID_SAVE_PATH, "result.json"), "wb") as f: 
                        f.write(resultJSON)
                
                if LOG_RESULT:
                    log_result(result)   
                    
                if LOG_JSON:
                    LOCAL_PATH = LOG_JSON
                    with open(LOCAL_PATH, "ab") as f:
                        f.write(resultJSON + "\n")
            except:
                logging.exception("Scan worker died, shutting down")
                ret_value = 1
                break
            finally:
                self.task_queue.task_done()
                self.result_queue.put(zlib.compress(resultJSON))

        close_modules()
        return ret_value
Exemple #2
0
def main():

    parser = OptionParser(
        usage="usage: %prog [options] (/path/to/file | stdin)")
    parser.add_option("-d",
                      "--debug",
                      action="store_true",
                      dest="debug",
                      help="enable debug messages to the console.")
    parser.add_option("-r",
                      "--remove-limit",
                      action="store_true",
                      dest="nolimit",
                      help="disable 20mb size limit (be careful!)")
    parser.add_option("-t",
                      "--timeout",
                      action="store",
                      type="int",
                      dest="timeout",
                      help="adjust request timeout period (in seconds)")
    parser.add_option("-c",
                      "--config-path",
                      action="store",
                      type="string",
                      dest="config_path",
                      help="specify a path to si-cloudscan.conf.")
    parser.add_option("-a",
                      "--address",
                      action="store",
                      type="string",
                      dest="broker_host",
                      help="specify an IP and port to connect to the broker")
    parser.add_option("-f",
                      "--file-list",
                      action="store",
                      type="string",
                      dest="file_list",
                      help="Specify a list of files to scan")
    parser.add_option("-s",
                      "--ssh-host",
                      action="store",
                      type="string",
                      dest="ssh_host",
                      help="specify a host for the SSH tunnel")
    parser.add_option(
        "-p",
        "--num-procs",
        action="store",
        type="int",
        default=6,
        dest="num_procs",
        help="Specify the number of processors to use for recursion")
    parser.add_option("-u",
                      "--source",
                      action="store",
                      type="string",
                      dest="source",
                      help="specify a custom source")
    parser.add_option("--ssh",
                      action="store_true",
                      default=False,
                      dest="use_ssh",
                      help="Use SSH tunneling")
    parser.add_option(
        "-l",
        "--level",
        action="store",
        type="string",
        dest="return_level",
        help="Return Level: minimal, metadata, full [default: metadata]")
    parser.add_option(
        "-o",
        "--out-path",
        action="store",
        type="string",
        dest="save_path",
        help="If Return Level Full has been specified, provide a path to "
        "save the results to [default: current directory]")
    parser.add_option(
        "-b",
        "--buffer",
        action="store_true",
        dest="stdin_buffer",
        help="Specify to allow a buffer to be collected by stdin.")
    parser.add_option("-e",
                      "--ephID",
                      action="store",
                      type="string",
                      dest="ephID",
                      default="",
                      help="Specify an ephID to send to Laika.")
    parser.add_option(
        "-m",
        "--ext-metadata",
        action="store",
        dest="ext_metadata",
        help="Specify external metadata to be passed into the scanner.")
    parser.add_option("-z",
                      "--log",
                      action="store_true",
                      dest="log_db",
                      help="Specify to turn on logging results.")
    parser.add_option(
        "-R",
        "--recursive",
        action="store_true",
        default=False,
        dest="recursive",
        help="Enable recursive directory scanning. If enabled, all files "
        "in the specified directory will be scanned. Results will "
        "be output to si-cloudscan.log in the current directory.")
    (options, args) = parser.parse_args()

    # Define default configuration location
    CONFIG_PATH = "/etc/si-cloudscan/si-cloudscan.conf"

    if options.config_path:
        CONFIG_PATH = options.config_path

    Config = ConfigParser.ConfigParser()
    Config.read(CONFIG_PATH)

    # Parse through the config file and append each section to a single dictionary
    global configs
    for section in Config.sections():
        configs.update(dict(Config.items(section)))

    # Set the working path, this will be used for file ouput if another
    # path is not specified
    WORKING_PATH = os.getcwd()

    if options.use_ssh:
        USE_SSH = True
    else:
        if strtobool(getConfig('use_ssh')):
            USE_SSH = True
        else:
            USE_SSH = False

    if options.ssh_host:
        SSH_HOST = options.ssh_host
    else:
        SSH_HOST = getConfig('ssh_host')

    if options.broker_host:
        BROKER_HOST = options.broker_host
    else:
        BROKER_HOST = getConfig('broker_host')

    if options.debug:
        logging.basicConfig(level=logging.DEBUG)

    logging.debug("Host: %s" % BROKER_HOST)

    if options.return_level:
        RETURN_LEVEL = options.return_level
    else:
        RETURN_LEVEL = getConfig('return_level')

    if options.source:
        SOURCE = options.source
    else:
        SOURCE = "si-cloudscan"

    if not options.log_db:
        SOURCE += "-nolog"

    if options.save_path:
        SAVE_PATH = options.save_path
    else:
        SAVE_PATH = WORKING_PATH

    if options.num_procs:
        num_procs = int(options.num_procs)
    else:
        num_procs = int(getConfig('num_procs'))

    if options.timeout:
        logging.debug("default timeout changed to %i" % options.timeout)
        REQUEST_TIMEOUT = options.timeout * 1000
    else:
        REQUEST_TIMEOUT = int(getConfig('request_timeout'))

    if options.ext_metadata:
        try:
            ext_metadata = json.loads(options.ext_metadata)
            assert isinstance(ext_metadata, dict)
        except:
            print "External Metadata must be a dictionary!"
            sys.exit(0)
    else:
        ext_metadata = dict()

    REQUEST_RETRIES = int(getConfig('request_retries'))

    # Attempt to get the hostname
    try:
        hostname = gethostname().split('.')[0]
    except:
        hostname = "none"

    # Attempt to set the return level, throw an error if it doesn't exist.
    try:
        return_level = globals()["level_%s" % RETURN_LEVEL]
    except KeyError as e:
        print "Please specify a valid return level: minimal, metadata or full"
        sys.exit(1)

    if not options.recursive:
        try:
            file_buffer = ''
            # Try to read the file

            if len(args) > 0:
                file_buffer = open(args[0], 'rb').read()
                file_len = len(file_buffer)
                logging.debug("opened file %s with len %i" %
                              (args[0], file_len))
            else:
                while sys.stdin in select.select([sys.stdin], [], [], 0)[0]:
                    line = sys.stdin.readline()
                    if not line:
                        break
                    else:
                        file_buffer += line

                if not file_buffer:
                    parser.print_usage()
                    sys.exit(1)

                file_len = len(file_buffer)

            if file_len > 20971520 and not options.nolimit:
                print "You're trying to scan a file larger than 20mb.. Are you sure?"
                print "Use the --remove-limit flag if you really want to do this."
                sys.exit(1)
        except IOError as e:
            print "\nERROR: The file does not exist: %s\n" % (args[0], )
            sys.exit(1)
    else:
        try:
            fileList = []
            if options.file_list:
                fileList = open(options.file_list).read().splitlines()
            else:
                if len(args) > 0:
                    rootdir = args[0]
                    for root, subFolders, files in os.walk(rootdir):
                        for fname in files:
                            fileList.append(os.path.join(root, fname))
                else:
                    while sys.stdin in select.select([sys.stdin], [], [],
                                                     0)[0]:
                        line = sys.stdin.readline()
                        if not line:
                            break
                        else:
                            fileList.append(line)
                    if not fileList:
                        parser.print_usage()
                        sys.exit(1)

            if len(fileList) > 1000 and not options.nolimit:
                print "You're trying to scan over 1000 files... Are you sure?"
                print "Use the --remove-limit flag if you really want to do this."
                sys.exit(1)

        except IOError as e:
            print "\nERROR: Directory does not exist: %s\n" % (args[0], )
            sys.exit(1)

    if not options.recursive:
        # Construct the object to be sent for scanning
        if args:
            filename = args[0]
        else:
            filename = "stdin"

        ext_metadata['server'] = hostname
        ext_metadata['user'] = getpass.getuser()
        externalObject = ExternalObject(
            buffer=file_buffer,
            externalVars=ExternalVars(filename=filename,
                                      ephID=options.ephID,
                                      extMetaData=ext_metadata,
                                      source="%s-%s-%s" %
                                      (SOURCE, hostname, getpass.getuser())),
            level=return_level)
    try:
        if not options.recursive:
            # Set up ZMQ context
            if USE_SSH:
                try:
                    logging.debug(
                        "attempting to connect to broker at %s and SSH host %s"
                        % (BROKER_HOST, SSH_HOST))
                    client = Client(BROKER_HOST,
                                    useSSH=True,
                                    sshHost=SSH_HOST,
                                    useGevent=True)
                except RuntimeError as e:
                    logging.exception("could not set up SSH tunnel to %s" %
                                      SSH_HOST)
                    sys.exit(1)
            else:
                logging.debug("SSH has been disabled.")
                client = Client(BROKER_HOST, useGevent=True)

            starttime = time.time()
            result = client.send(externalObject,
                                 retry=REQUEST_RETRIES,
                                 timeout=REQUEST_TIMEOUT)
            logging.debug("got reply in %s seconds" %
                          str(time.time() - starttime))
            rootObject = getRootObject(result)
            try:
                jsonResult = getJSON(result)
                print jsonResult
            except:
                logging.exception("error occured collecting results")
                return
            if return_level == level_full:
                SAVE_PATH = "%s/%s" % (SAVE_PATH,
                                       get_scanObjectUID(rootObject))
                if not os.path.exists(SAVE_PATH):
                    try:
                        os.makedirs(SAVE_PATH)
                        print "\nWriting results to %s...\n" % SAVE_PATH
                    except (OSError, IOError) as e:
                        print "\nERROR: unable to write to %s...\n" % SAVE_PATH
                        return
                else:
                    print "\nOutput folder already exists! Skipping results output...\n"
                    return
                for uid, scanObject in result.files.iteritems():
                    f = open("%s/%s" % (SAVE_PATH, uid), "wb")
                    f.write(scanObject.buffer)
                    f.close()
                    try:
                        if scanObject.filename and scanObject.parent:
                            linkPath = "%s/%s" % (SAVE_PATH,
                                                  scanObject.filename.replace(
                                                      "/", "_"))
                            if not os.path.lexists(linkPath):
                                os.symlink("%s" % (uid), linkPath)
                        elif scanObject.filename:
                            filenameParts = scanObject.filename.split("/")
                            os.symlink(
                                "%s" % (uid),
                                "%s/%s" % (SAVE_PATH, filenameParts[-1]))
                    except:
                        print "Unable to create symlink for %s" % (uid)

                f = open("%s/%s" % (SAVE_PATH, "results.log"), "wb")
                f.write(jsonResult)
                f.close()
                sys.exit(1)
        else:
            try:
                fh = open('si-cloudscan.log', 'w')
                fh.close()
            except:
                pass

            for fname in fileList:
                job_queue.put(fname)

            for i in range(num_procs):
                job_queue.put("STOP")

            print "File list length: %s" % len(fileList)

            for i in range(num_procs):
                Process(target=worker,
                        args=(
                            options.nolimit,
                            REQUEST_RETRIES,
                            REQUEST_TIMEOUT,
                            SAVE_PATH,
                            SOURCE,
                            return_level,
                            hostname,
                            USE_SSH,
                            BROKER_HOST,
                            SSH_HOST,
                            ext_metadata,
                            options.ephID,
                        )).start()

            results_processed = 0
            while results_processed < len(fileList):
                logging.debug("Files left: %s" %
                              ((len(fileList) - results_processed)))
                resultText = result_queue.get()
                try:
                    # Process results
                    fh = open('si-cloudscan.log', 'ab')
                    fh.write('%s\n' % resultText)
                    fh.close()
                    results_processed += 1
                except Exception as e:
                    raise

            print 'Wrote results to si-cloudscan.log'

    except KeyboardInterrupt:
        print "Interrupted by user, exiting..."
        sys.exit(1)
Exemple #3
0
def worker(nolimit, REQUEST_RETRIES, REQUEST_TIMEOUT, SAVE_PATH, SOURCE,
           return_level, hostname, USE_SSH, BROKER_HOST, SSH_HOST,
           ext_metadata, ephID):
    # Set up ZMQ context
    if USE_SSH:
        try:
            logging.debug(
                "attempting to connect to broker at %s and SSH host %s" %
                (BROKER_HOST, SSH_HOST))
            client = Client(BROKER_HOST, useSSH=True, sshHost=SSH_HOST)
        except RuntimeError as e:
            logging.exception("could not set up SSH tunnel to %s" % SSH_HOST)
            sys.exit(1)
    else:
        logging.debug("SSH has been disabled.")
        client = Client(BROKER_HOST)

    randNum = randint(1, 10000)

    for fname in iter(job_queue.get, 'STOP'):
        print "Worker %s: Starting new request" % randNum
        try:
            # Try to read the file
            file_buffer = open(fname, 'rb').read()
            file_len = len(file_buffer)
            logging.debug("opened file %s with len %i" % (fname, file_len))
            if file_len > 20971520 and not nolimit:
                print "You're trying to scan a file larger than 20mb.. Are you sure?"
                print "Use the --remove-limit flag if you really want to do this."
                print "File has not been scanned: %s" % fname
                result_queue.put(
                    "~~~~~~~~~~~~~~~~~~~~\nFile has not been scanned due to size: %s\n~~~~~~~~~~~~~~~~~~~~"
                    % fname)
                continue
        except IOError as e:
            print "\nERROR: The file does not exist: %s\n" % (fname, )
            print "Moving to next file..."
            result_queue.put(
                "~~~~~~~~~~~~~~~~~~~~\nFile has not been scanned due to an IO Error: %s\n~~~~~~~~~~~~~~~~~~~~"
                % fname)
            continue

        try:
            # Construct the object to be sent for scanning
            externalObject = ExternalObject(
                buffer=file_buffer,
                externalVars=ExternalVars(
                    filename=fname,
                    ephID=ephID,
                    extMetaData=ext_metadata,
                    source="%s-%s-%s" % (SOURCE, hostname, getpass.getuser())),
                level=return_level)

            starttime = time.time()
            result = client.send(externalObject,
                                 retry=REQUEST_RETRIES,
                                 timeout=REQUEST_TIMEOUT)
            if not result:
                result_queue.put(
                    "~~~~~~~~~~~~~~~~~~~~\nFile timed out in the scanner: %s\n~~~~~~~~~~~~~~~~~~~~"
                    % fname)
                continue

            logging.debug("got reply in %s seconds" %
                          str(time.time() - starttime))
            rootObject = getRootObject(result)

            jsonResult = getJSON(result)
            resultText = '%s\n' % jsonResult

            if return_level == level_full:
                FILE_SAVE_PATH = "%s/%s" % (SAVE_PATH,
                                            get_scanObjectUID(rootObject))
                if not os.path.exists(FILE_SAVE_PATH):
                    try:
                        os.makedirs(FILE_SAVE_PATH)
                        print "Writing results to %s..." % FILE_SAVE_PATH
                    except (OSError, IOError) as e:
                        print "\nERROR: unable to write to %s...\n" % FILE_SAVE_PATH
                        return
                else:
                    print "\nOutput folder already exists! Skipping results output...\n"
                    return
                for uid, scanObject in result.files.iteritems():
                    f = open("%s/%s" % (FILE_SAVE_PATH, uid), "wb")
                    f.write(scanObject.buffer)
                    f.close()
                    if scanObject.filename and scanObject.depth != 0:
                        linkPath = "%s/%s" % (FILE_SAVE_PATH,
                                              scanObject.filename.replace(
                                                  "/", "_"))
                        if not os.path.lexists(linkPath):
                            os.symlink("%s" % (uid), linkPath)
                    elif scanObject.filename:
                        filenameParts = scanObject.filename.split("/")
                        linkPath = "%s/%s" % (FILE_SAVE_PATH,
                                              filenameParts[-1])
                        if not os.path.lexists(linkPath):
                            os.symlink("%s" % (uid), linkPath)
                f = open("%s/%s" % (FILE_SAVE_PATH, "results.json"), "wb")
                f.write(jsonResult)
                f.close()

            result_queue.put(resultText)
        except:
            #logging.exception("error occured collecting results")
            result_queue.put(
                "~~~~~~~~~~~~~~~~~~~~\nUNKNOWN ERROR OCCURRED: %s\n~~~~~~~~~~~~~~~~~~~~"
                % fname)
            continue
Exemple #4
0
    def run(self):
        global CONFIG_PATH
        config.init(path=CONFIG_PATH)
        init_logging()
        ret_value = 0

        # Loop and accept messages from both channels, acting accordingly
        while True:
            next_task = self.task_queue.get()
            if next_task is None:
                # Poison pill means shutdown
                self.task_queue.task_done()
                logging.debug("%s Got poison pill" % (os.getpid()))
                break
            try:
                with open(next_task) as nextfile:
                    file_buffer = nextfile.read()
            except IOError:
                logging.debug("Error opening: %s" % (next_task))
                self.task_queue.task_done()
                self.result_queue.put(answer)
                continue

            resultJSON = ""
            try:
                # perform the work
                result = ScanResult()
                result.source = SOURCE 
                result.startTime = time.time()
                result.level = level_metadata
                myexternalVars = ExternalVars(filename=next_task,
                                             source=SOURCE,
                                             ephID=EPHID,
                                             extMetaData=EXT_METADATA)

                Dispatch(file_buffer, result, 0, externalVars=myexternalVars, extScanModules=SCAN_MODULES)

                resultJSON = getJSON(result)
                if SAVE_PATH:
                    rootObject = getRootObject(result)
                    UID_SAVE_PATH = "%s/%s" % (SAVE_PATH, get_scanObjectUID(rootObject))
                    if not os.path.exists(UID_SAVE_PATH):
                        try:
                            os.makedirs(UID_SAVE_PATH)
                        except (OSError, IOError) as e:
                            error("\nERROR: unable to write to %s...\n" % (UID_SAVE_PATH))
                            raise
                    for uid, scanObject in result.files.iteritems():
                        with open("%s/%s" % (UID_SAVE_PATH, uid), "wb") as f:
                            f.write(scanObject.buffer)
                        if scanObject.filename and scanObject.depth != 0:
                            linkPath = "%s/%s" % (UID_SAVE_PATH, scanObject.filename.replace("/","_"))
                            if not os.path.lexists(linkPath):
                                os.symlink("%s" % (uid), linkPath)
                        elif scanObject.filename:
                            filenameParts = scanObject.filename.split("/")
                            os.symlink("%s" % (uid), "%s/%s" % (UID_SAVE_PATH, filenameParts[-1]))
                    with open("%s/%s" % (UID_SAVE_PATH, "result.json"), "wb") as f: 
                        f.write(resultJSON)
                
                if LOG_RESULT:
                    log_result(result)            
            except:
                logging.exception("Scan worker died, shutting down")
                ret_value = 1
                break
            finally:
                self.task_queue.task_done()
                self.result_queue.put(zlib.compress(resultJSON))

        close_modules()
        return ret_value
Exemple #5
0
def main():

    parser = OptionParser(usage="usage: %prog [options] (/path/to/file | stdin)")
    parser.add_option("-d", "--debug",
                      action="store_true",
                      dest="debug",
                      help="enable debug messages to the console.")
    parser.add_option("-r", "--remove-limit",
                      action="store_true",
                      dest="nolimit",
                      help="disable 20mb size limit (be careful!)")
    parser.add_option("-t", "--timeout",
                      action="store", type="int",
                      dest="timeout",
                      help="adjust request timeout period (in seconds)")
    parser.add_option("-c", "--config-path",
                      action="store", type="string",
                      dest="config_path",
                      help="specify a path to cloudscan.conf.")
    parser.add_option("-a", "--address",
                      action="store", type="string",
                      dest="broker_host",
                      help="specify an IP and port to connect to the broker")
    parser.add_option("-f", "--file-list",
                      action="store", type="string",
                      dest="file_list",
                      help="Specify a list of files to scan")
    parser.add_option("-s", "--ssh-host",
                      action="store", type="string",
                      dest="ssh_host",
                      help="specify a host for the SSH tunnel")
    parser.add_option("-p", "--num-procs",
                      action="store", type="int", default=6,
                      dest="num_procs",
                      help="Specify the number of processors to use for recursion")
    parser.add_option("-u", "--source",
                      action="store", type="string",
                      dest="source",
                      help="specify a custom source")
    parser.add_option("--ssh",
                      action="store_true",
                      default=False,
                      dest="use_ssh",
                      help="Use SSH tunneling")
    parser.add_option("-l", "--level",
                      action="store", type="string",
                      dest="return_level",
                      help="Return Level: minimal, metadata, full [default: metadata]")
    parser.add_option("-o", "--out-path",
                      action="store", type="string",
                      dest="save_path",
                      help="If Return Level Full has been specified, provide a path to "
                            "save the results to [default: current directory]")
    parser.add_option("-b", "--buffer",
                      action="store_true",
                      dest="stdin_buffer",
                      help="Specify to allow a buffer to be collected by stdin.")
    parser.add_option("-e", "--ephID",
                      action="store", type="string",
                      dest="ephID", default="",
                      help="Specify an ephID to send to Laika.")
    parser.add_option("-m", "--ext-metadata",
                      action="store",
                      dest="ext_metadata",
                      help="Specify external metadata to be passed into the scanner.")
    parser.add_option("-z", "--log",
                      action="store_true",
                      dest="log_db",
                      help="Specify to turn on logging results.")
    parser.add_option("-R", "--recursive",
                      action="store_true",
                      default=False,
                      dest="recursive",
                      help="Enable recursive directory scanning. If enabled, all files "
                            "in the specified directory will be scanned. Results will "
                            "be output to cloudscan.log in the current directory.")
    (options, args) = parser.parse_args()


    # Define default configuration location
    CONFIG_PATH = "/etc/laikaboss/cloudscan.conf"

    if options.config_path:
        CONFIG_PATH = options.config_path
    
    Config = ConfigParser.ConfigParser()
    Config.read(CONFIG_PATH)

    # Parse through the config file and append each section to a single dictionary
    global configs
    for section in Config.sections():
        configs.update(dict(Config.items(section)))

    # Set the working path, this will be used for file ouput if another
    # path is not specified
    WORKING_PATH = os.getcwd()

    if options.use_ssh:
        USE_SSH = True
    else: 
        if strtobool(getConfig('use_ssh')):
            USE_SSH = True
        else:
            USE_SSH = False

    if options.ssh_host:
        SSH_HOST = options.ssh_host
    else:
        SSH_HOST = getConfig('ssh_host')
        
    if options.broker_host:
        BROKER_HOST = options.broker_host
    else:
        BROKER_HOST = getConfig('broker_host')
 
    if options.debug:
        logging.basicConfig(level=logging.DEBUG)

    logging.debug("Host: %s" % BROKER_HOST)

    if options.return_level:
        RETURN_LEVEL = options.return_level
    else:
        RETURN_LEVEL = getConfig('return_level')

    if options.source:
        SOURCE = options.source
    else:
        SOURCE = "cloudscan"

    if not options.log_db:
        SOURCE += "-nolog"
     
    if options.save_path:
        SAVE_PATH = options.save_path
    else:
        SAVE_PATH = WORKING_PATH
    
    if options.num_procs:
        num_procs = int(options.num_procs)
    else:
        num_procs = int(getConfig('num_procs'))

    if options.timeout:
        logging.debug("default timeout changed to %i" % options.timeout)
        REQUEST_TIMEOUT = options.timeout * 1000
    else:
        REQUEST_TIMEOUT = int(getConfig('request_timeout'))

    if options.ext_metadata:
        try:
            if os.path.exists(options.ext_metadata):
                with open(options.ext_metadata) as metafile:
                    ext_metadata = json.loads(metafile.read())
            else:
                ext_metadata = json.loads(options.ext_metadata)
            assert isinstance(ext_metadata, dict)
        except:
            print "External Metadata must be a dictionary!"
            sys.exit(0)
    else:
        ext_metadata = dict()

    REQUEST_RETRIES = int(getConfig('request_retries'))
    
    # Attempt to get the hostname
    try:
        hostname = gethostname().split('.')[0] 
    except:
        hostname = "none"

    
    # Attempt to set the return level, throw an error if it doesn't exist.
    try:
        return_level = globals()["level_%s" % RETURN_LEVEL]
    except KeyError as e:
        print "Please specify a valid return level: minimal, metadata or full"
        sys.exit(1)

    if not options.recursive:
        try:
            file_buffer = ''
            # Try to read the file

            if len(args) > 0:
                file_buffer = open(args[0], 'rb').read()
                file_len = len(file_buffer)
                logging.debug("opened file %s with len %i" % (args[0], file_len))
            else:
                while sys.stdin in select.select([sys.stdin], [], [], 0)[0]:
                    line = sys.stdin.readline()
                    if not line:
                        break
                    else:
                        file_buffer += line

                if not file_buffer:
                    parser.print_usage()
                    sys.exit(1)
                
                file_len = len(file_buffer)

            if file_len > 20971520 and not options.nolimit:
                print "You're trying to scan a file larger than 20mb.. Are you sure?"
                print "Use the --remove-limit flag if you really want to do this."
                sys.exit(1)
        except IOError as e:
            print "\nERROR: The file does not exist: %s\n" % (args[0],)
            sys.exit(1)
    else:
        try:
            fileList = []
            if options.file_list:
                fileList = open(options.file_list).read().splitlines()
            else:
                if len(args) > 0:
                    rootdir = args[0]
                    for root, subFolders, files in os.walk(rootdir):
                        for fname in files:
                            fileList.append(os.path.join(root, fname))
                else:
                    while sys.stdin in select.select([sys.stdin], [], [], 0)[0]:
                        line = sys.stdin.readline()
                        if not line:
                            break
                        else:
                            fileList.append(line)
                    if not fileList:
                        parser.print_usage()
                        sys.exit(1)

            
            if len(fileList) > 1000 and not options.nolimit:
                print "You're trying to scan over 1000 files... Are you sure?"
                print "Use the --remove-limit flag if you really want to do this."
                sys.exit(1)

        except IOError as e:
            print "\nERROR: Directory does not exist: %s\n" % (args[0],)
            sys.exit(1)


   
    if not options.recursive: 
        # Construct the object to be sent for scanning
        if args:
            filename = args[0]
        else:
            filename = "stdin"

        ext_metadata['server'] = hostname
        ext_metadata['user'] = getpass.getuser()
        externalObject = ExternalObject(buffer=file_buffer, 
                                        externalVars=ExternalVars(filename=filename, 
                                                                  ephID=options.ephID,
                                                                  extMetaData=ext_metadata,
                                                                  source="%s-%s-%s" % (SOURCE,
                                                                         hostname,
                                                                         getpass.getuser())),
                                        level=return_level)
    try:
        if not options.recursive:
            # Set up ZMQ context 
            if USE_SSH:
                try:
                    logging.debug("attempting to connect to broker at %s and SSH host %s" % (BROKER_HOST, SSH_HOST))
                    client = Client(BROKER_HOST, useSSH=True, sshHost=SSH_HOST, useGevent=True)
                except RuntimeError as e:
                    logging.exception("could not set up SSH tunnel to %s" % SSH_HOST)
                    sys.exit(1)
            else:
                logging.debug("SSH has been disabled.")
                client = Client(BROKER_HOST, useGevent=True)

            starttime = time.time()
            result = client.send(externalObject, retry=REQUEST_RETRIES, timeout=REQUEST_TIMEOUT)
            logging.debug("got reply in %s seconds" % str(time.time() - starttime))
            if result:
                rootObject = getRootObject(result)
                try:
                    jsonResult = getJSON(result)
                    print jsonResult
                except:
                    logging.exception("error occured collecting results")
                    return
                if return_level == level_full:
                    SAVE_PATH = "%s/%s" % (SAVE_PATH, get_scanObjectUID(rootObject))
                    if not os.path.exists(SAVE_PATH):
                        try:
                            os.makedirs(SAVE_PATH)
                            print "\nWriting results to %s...\n" % SAVE_PATH
                        except (OSError, IOError) as e:
                            print "\nERROR: unable to write to %s...\n" % SAVE_PATH
                            return
                    else:
                        print "\nOutput folder already exists! Skipping results output...\n"
                        return
                    for uid, scanObject in result.files.iteritems():
                        f = open("%s/%s" % (SAVE_PATH, uid), "wb")
                        f.write(scanObject.buffer)
                        f.close()
                        try:
                            if scanObject.filename and scanObject.parent:
                                linkPath = "%s/%s" % (SAVE_PATH, scanObject.filename.replace("/","_"))
                                if not os.path.lexists(linkPath):
                                    os.symlink("%s" % (uid), linkPath)
                            elif scanObject.filename:
                                filenameParts = scanObject.filename.split("/")
                                os.symlink("%s" % (uid), "%s/%s" % (SAVE_PATH, filenameParts[-1]))
                        except:
                            print "Unable to create symlink for %s" % (uid)

                    f = open("%s/%s" % (SAVE_PATH, "results.log"), "wb")
                    f.write(jsonResult)
                    f.close()
                    sys.exit(1)
            else:
                print "ERROR: No result received (scan timed out)"
                return
        else:
            try:
                fh = open('cloudscan.log', 'w')
                fh.close()
            except:
                pass

            for fname in fileList:
                job_queue.put(fname)

            for i in range(num_procs):
                job_queue.put("STOP")

            print "File list length: %s" % len(fileList)

            for i in range(num_procs):
                Process(target=worker, args=(options.nolimit, REQUEST_RETRIES, REQUEST_TIMEOUT, SAVE_PATH, SOURCE, return_level, hostname, USE_SSH, BROKER_HOST, SSH_HOST,ext_metadata,options.ephID,)).start()
   
            results_processed = 0
            while results_processed < len(fileList):
                logging.debug("Files left: %s" % ((len(fileList) - results_processed)))
                resultText = result_queue.get()
                try:
                    # Process results
                    fh = open('cloudscan.log', 'ab')
                    fh.write('%s\n' % resultText)
                    fh.close()
                    results_processed += 1
                except Exception as e:
                    raise

            print 'Wrote results to cloudscan.log'

    except KeyboardInterrupt:
        print "Interrupted by user, exiting..."
        sys.exit(1)
Exemple #6
0
def worker(nolimit, REQUEST_RETRIES, REQUEST_TIMEOUT, SAVE_PATH, SOURCE, return_level, hostname, USE_SSH, BROKER_HOST, SSH_HOST, ext_metadata, ephID):
    # Set up ZMQ context 
    if USE_SSH:
        try:
            logging.debug("attempting to connect to broker at %s and SSH host %s" % (BROKER_HOST, SSH_HOST))
            client = Client(BROKER_HOST, useSSH=True, sshHost=SSH_HOST)
        except RuntimeError as e:
            logging.exception("could not set up SSH tunnel to %s" % SSH_HOST)
            sys.exit(1)
    else:
        logging.debug("SSH has been disabled.")
        client = Client(BROKER_HOST)

    
    randNum = randint(1, 10000)
    
    for fname in iter(job_queue.get, 'STOP'):
        print "Worker %s: Starting new request" % randNum
        try:
            # Try to read the file
            file_buffer = open(fname, 'rb').read()
            file_len = len(file_buffer)
            logging.debug("opened file %s with len %i" % (fname, file_len))
            if file_len > 20971520 and not nolimit:
                print "You're trying to scan a file larger than 20mb.. Are you sure?"
                print "Use the --remove-limit flag if you really want to do this."
                print "File has not been scanned: %s" % fname
                result_queue.put("~~~~~~~~~~~~~~~~~~~~\nFile has not been scanned due to size: %s\n~~~~~~~~~~~~~~~~~~~~" % fname)
                continue
        except IOError as e:
            print "\nERROR: The file does not exist: %s\n" % (fname,)
            print "Moving to next file..."
            result_queue.put("~~~~~~~~~~~~~~~~~~~~\nFile has not been scanned due to an IO Error: %s\n~~~~~~~~~~~~~~~~~~~~" % fname)
            continue

        try:
            # Construct the object to be sent for scanning
            externalObject = ExternalObject(buffer=file_buffer,
                                            externalVars=ExternalVars(filename=fname,
                                                                      ephID=ephID,
                                                                      extMetaData=ext_metadata,
                                                                      source="%s-%s-%s" % (SOURCE,
                                                                                           hostname,
                                                                                           getpass.getuser())),
                                        level=return_level)

            starttime = time.time()
            result = client.send(externalObject, retry=REQUEST_RETRIES, timeout=REQUEST_TIMEOUT)
            if not result:
                result_queue.put("~~~~~~~~~~~~~~~~~~~~\nFile timed out in the scanner: %s\n~~~~~~~~~~~~~~~~~~~~" % fname)
                continue

            logging.debug("got reply in %s seconds" % str(time.time() - starttime))
            rootObject = getRootObject(result)

            jsonResult = getJSON(result)
            resultText = '%s\n' % jsonResult

            if return_level == level_full:
                FILE_SAVE_PATH = "%s/%s" % (SAVE_PATH, get_scanObjectUID(rootObject))
                if not os.path.exists(FILE_SAVE_PATH):
                    try:
                        os.makedirs(FILE_SAVE_PATH)
                        print "Writing results to %s..." % FILE_SAVE_PATH
                    except (OSError, IOError) as e:
                        print "\nERROR: unable to write to %s...\n" % FILE_SAVE_PATH
                        return
                else:
                    print "\nOutput folder already exists! Skipping results output...\n"
                    return
                for uid, scanObject in result.files.iteritems():
                    f = open("%s/%s" % (FILE_SAVE_PATH, uid), "wb")
                    f.write(scanObject.buffer)
                    f.close()
                    if scanObject.filename and scanObject.depth != 0:
                        linkPath = "%s/%s" % (FILE_SAVE_PATH, scanObject.filename.replace("/","_"))
                        if not os.path.lexists(linkPath):
                            os.symlink("%s" % (uid), linkPath)
                    elif scanObject.filename:
                        filenameParts = scanObject.filename.split("/")
                        linkPath = "%s/%s" % (FILE_SAVE_PATH, filenameParts[-1])
                        if not os.path.lexists(linkPath):
                            os.symlink("%s" % (uid), linkPath)
                f = open("%s/%s" % (FILE_SAVE_PATH, "results.json"), "wb")
                f.write(jsonResult)
                f.close()
            
            result_queue.put(resultText)
        except:
            #logging.exception("error occured collecting results")
            result_queue.put("~~~~~~~~~~~~~~~~~~~~\nUNKNOWN ERROR OCCURRED: %s\n~~~~~~~~~~~~~~~~~~~~" % fname)
            continue