def setup(self):
     self.result = multiscanner.multiscan(self.filelist,
                                          recursive=False,
                                          configregen=False,
                                          configfile='.tmpfile.ini')
     self.report = multiscanner.parse_reports(self.result,
                                              includeMetadata=False,
                                              python=True)
     self.report_m = multiscanner.parse_reports(self.result,
                                                includeMetadata=True,
                                                python=True)
Beispiel #2
0
def multiscanner_process(work_queue, exit_signal):
    '''Not used in distributed mode.
    '''
    metadata_list = []
    time_stamp = None
    while True:
        time.sleep(1)
        try:
            metadata_list.append(work_queue.get_nowait())
            if not time_stamp:
                time_stamp = time.time()
            while len(metadata_list) < batch_size:
                metadata_list.append(work_queue.get_nowait())
        except queue.Empty:
            if metadata_list and time_stamp:
                if len(metadata_list) >= batch_size:
                    pass
                elif time.time() - time_stamp > batch_interval:
                    pass
                else:
                    continue
            else:
                continue

        filelist = [item[0] for item in metadata_list]
        # modulelist = [item[5] for item in metadata_list]
        resultlist = multiscanner.multiscan(
            filelist, configfile=multiscanner.CONFIG
            # module_list
        )
        results = multiscanner.parse_reports(resultlist, python=True)

        scan_time = datetime.now().isoformat()

        if delete_after_scan:
            for file_name in results:
                os.remove(file_name)

        for item in metadata_list:
            # Use the original filename as the index instead of the full path
            results[item[1]] = results[item[0]]
            del results[item[0]]

            results[item[1]]['Scan Metadata'] = item[4]
            results[item[1]]['Scan Metadata']['Scan Time'] = scan_time
            results[item[1]]['Scan Metadata']['Task ID'] = item[2]

            db.update_task(
                task_id=item[2],
                task_status='Complete',
                timestamp=scan_time,
            )
        metadata_list = []

        storage_handler.store(results, wait=False)

        filelist = []
        time_stamp = None
    storage_handler.close()
def celery_task(files, config=multiscanner.CONFIG):
    '''
    Run multiscanner on the given file and store the results in the storage
    handler(s) specified in the storage configuration file.
    '''
    # Get the storage config
    storage_conf = multiscanner.common.get_config_path(config, 'storage')
    storage_handler = multiscanner.storage.StorageHandler(
        configfile=storage_conf)

    resultlist = multiscanner.multiscan(list(files), configfile=config)
    results = multiscanner.parse_reports(resultlist, python=True)

    scan_time = datetime.now().isoformat()

    # Loop through files in a way compatible with Py 2 and 3, and won't be
    # affected by changing keys to original filenames
    for file_ in files:
        original_filename = files[file_]['original_filename']
        task_id = files[file_]['task_id']
        file_hash = files[file_]['file_hash']
        metadata = files[file_]['metadata']
        # Get the Scan Config that the task was run with and
        # add it to the task metadata
        scan_config_object = configparser.SafeConfigParser()
        scan_config_object.optionxform = str
        scan_config_object.read(config)
        full_conf = common.parse_config(scan_config_object)
        sub_conf = {}
        for key in full_conf:
            if key == 'main':
                continue
            sub_conf[key] = {}
            sub_conf[key]['ENABLED'] = full_conf[key]['ENABLED']
        results[file_]['Scan Metadata'] = {}
        results[file_]['Scan Metadata']['Worker Node'] = gethostname()
        results[file_]['Scan Metadata']['Scan Config'] = sub_conf

        # Use the original filename as the value for the filename
        # in the report (instead of the tmp path assigned to the file
        # by the REST API)
        results[original_filename] = results[file_]
        del results[file_]

        results[original_filename]['Scan Time'] = scan_time
        results[original_filename]['Metadata'] = metadata

        # Update the task DB to reflect that the task is done
        db.update_task(
            task_id=task_id,
            task_status='Complete',
            timestamp=scan_time,
        )

    # Save the reports to storage
    storage_handler.store(results, wait=False)
    storage_handler.close()

    return results
Beispiel #4
0
def test_none_report():
    test_results = [([('a', True), ('b', '/tmp/b'), ('c', True),
                      ('/d/d', '/tmp/d')], {
                          'Name': 'test_2',
                          'Include': True,
                          'Type': 'Test'
                      }), None]
    report = multiscanner.parse_reports(test_results,
                                        ugly=True,
                                        includeMetadata=False,
                                        python=False)
    assert report == '{"/d/d":{"test_2":"/tmp/d"},"a":{"test_2":true},"b":{"test_2":"/tmp/b"},"c":{"test_2":true}}'
def test_invalid_utf8_python():
    reportlist = [([('file', '\x97안녕하세요')], {'Name': 'Test', 'Type': 'Test'})]
    r = multiscanner.parse_reports(reportlist, python=True)
    assert r == {
        "file": {
            "Test": "\x97안녕하세요"
        }
    } or r == {
        "file": {
            "Test": u"\ufffd안녕하세요"
        }
    }
Beispiel #6
0
def test_meta_report():
    test_results = [([('a', True), ('b', '/tmp/b'), ('c', True),
                      ('/d/d', '/tmp/d')], {
                          'Name': 'test_2',
                          'Include': True,
                          'Type': 'Test',
                          'Var': 1
                      })]
    report = multiscanner.parse_reports(test_results,
                                        ugly=True,
                                        includeMetadata=True,
                                        python=False)
    assert report == '{"Files":{"/d/d":{"test_2":"/tmp/d"},"a":{"test_2":true},"b":{"test_2":"/tmp/b"},"c":{"test_2":true}},"Metadata":{"test_2":{"Include":true,"Name":"test_2","Type":"Test","Var":1}}}'
Beispiel #7
0
def multiscanner_process(work_queue, exit_signal):
    metadata_list = []
    time_stamp = None
    while True:
        time.sleep(1)
        try:
            metadata_list.append(work_queue.get_nowait())
            if not time_stamp:
                time_stamp = time.time()
            while len(metadata_list) < BATCH_SIZE:
                metadata_list.append(work_queue.get_nowait())
        except queue.Empty:
            if metadata_list and time_stamp:
                if len(metadata_list) >= BATCH_SIZE:
                    pass
                elif time.time() - time_stamp > WAIT_SECONDS:
                    pass
                else:
                    continue
            else:
                continue

        filelist = [item[0] for item in metadata_list]
        resultlist = multiscanner.multiscan(
            filelist, configfile=multiscanner.CONFIG
        )
        results = multiscanner.parse_reports(resultlist, python=True)

        for file_name in results:
            os.remove(file_name)

        for item in metadata_list:

            results[item[1]] = results[item[0]]
            del results[item[0]]

            db.update_task(
                task_id=item[2],
                task_status='Complete',
                report_id=item[3]
            )

        storage_handler.store(results, wait=False)

        filelist = []
        time_stamp = None
    storage_handler.close()
Beispiel #8
0
def multiscanner_process(work_queue, exit_signal):
    metadata_list = []
    time_stamp = None
    while True:
        time.sleep(1)
        try:
            metadata_list.append(work_queue.get_nowait())
            if not time_stamp:
                time_stamp = time.time()
            while len(metadata_list) < BATCH_SIZE:
                metadata_list.append(work_queue.get_nowait())
        except queue.Empty:
            if metadata_list and time_stamp:
                if len(metadata_list) >= BATCH_SIZE:
                    pass
                elif time.time() - time_stamp > WAIT_SECONDS:
                    pass
                else:
                    continue
            else:
                continue

        filelist = [item[0] for item in metadata_list]
        resultlist = multiscanner.multiscan(filelist,
                                            configfile=multiscanner.CONFIG)
        results = multiscanner.parse_reports(resultlist, python=True)

        for file_name in results:
            os.remove(file_name)

        for item in metadata_list:

            results[item[1]] = results[item[0]]
            del results[item[0]]

            db.update_task(task_id=item[2],
                           task_status='Complete',
                           report_id=item[3])

        storage_handler.store(results, wait=False)

        filelist = []
        time_stamp = None
    storage_handler.close()
Beispiel #9
0
def multiscanner_process(work_queue, config, batch_size, wait_seconds, delete,
                         exit_signal):
    filelist = []
    time_stamp = None
    storage_conf = multiscanner.common.get_config_path(config, 'storage')
    storage_handler = multiscanner.storage.StorageHandler(
        configfile=storage_conf)
    while not exit_signal.value:
        time.sleep(1)
        try:
            filelist.append(work_queue.get_nowait())
            if not time_stamp:
                time_stamp = time.time()
            while len(filelist) < batch_size:
                filelist.append(work_queue.get_nowait())
        except queue.Empty:
            if filelist and time_stamp:
                if len(filelist) >= batch_size:
                    pass
                elif time.time() - time_stamp > wait_seconds:
                    pass
                else:
                    continue
            else:
                continue

        resultlist = multiscanner.multiscan(filelist, configfile=config)
        results = multiscanner.parse_reports(resultlist, python=True)
        if delete:
            for file_name in results:
                os.remove(file_name)

        storage_handler.store(results, wait=False)
        print('Scanned', len(results), 'files')

        filelist = []
        time_stamp = None
    storage_handler.close()
Beispiel #10
0
def multiscanner_process(work_queue, config, batch_size, wait_seconds, delete, exit_signal):
    filelist = []
    time_stamp = None
    storage_conf = utils.get_config_path(config, 'storage')
    storage_handler = storage.StorageHandler(configfile=storage_conf)
    while not exit_signal.value:
        time.sleep(1)
        try:
            filelist.append(work_queue.get_nowait())
            if not time_stamp:
                time_stamp = time.time()
            while len(filelist) < batch_size:
                filelist.append(work_queue.get_nowait())
        except queue.Empty:
            if filelist and time_stamp:
                if len(filelist) >= batch_size:
                    pass
                elif time.time() - time_stamp > wait_seconds:
                    pass
                else:
                    continue
            else:
                continue

        resultlist = multiscan(filelist, configfile=config)
        results = parse_reports(resultlist, python=True)
        if delete:
            for file_name in results:
                os.remove(file_name)

        storage_handler.store(results, wait=False)
        print('Scanned', len(results), 'files')

        filelist = []
        time_stamp = None
    storage_handler.close()
def test_valid_reports_string():
    reportlist = [([('file', 'result')], {'Name': 'Test', 'Type': 'Test'})]
    r = multiscanner.parse_reports(reportlist, python=False)
    assert r == '{"file":{"Test":"result"}}'
 def setup(self):
     self.result = multiscanner.multiscan(
         self.filelist, recursive=False, configregen=False, configfile='.tmpfile.ini')
     self.report = multiscanner.parse_reports(self.result, includeMetadata=False, python=True)
     self.report_m = multiscanner.parse_reports(self.result, includeMetadata=True, python=True)
Beispiel #13
0
def test_none_report():
    test_results = [([('a', True), ('b', '/tmp/b'), ('c', True), ('/d/d', '/tmp/d')], {'Name': 'test_2', 'Include': True, 'Type': 'Test'}), None]
    report = multiscanner.parse_reports(test_results, ugly=True, includeMetadata=False, python=False)
    assert report == '{"/d/d":{"test_2":"/tmp/d"},"a":{"test_2":true},"b":{"test_2":"/tmp/b"},"c":{"test_2":true}}'
Beispiel #14
0
def test_meta_report():
    test_results = [([('a', True), ('b', '/tmp/b'), ('c', True), ('/d/d', '/tmp/d')], {'Name': 'test_2', 'Include': True, 'Type': 'Test', 'Var': 1})]
    report = multiscanner.parse_reports(test_results, ugly=True, includeMetadata=True, python=False)
    assert report == '{"Files":{"/d/d":{"test_2":"/tmp/d"},"a":{"test_2":true},"b":{"test_2":"/tmp/b"},"c":{"test_2":true}},"Metadata":{"test_2":{"Include":true,"Name":"test_2","Type":"Test","Var":1}}}'
Beispiel #15
0
def multiscanner_process(work_queue, exit_signal):
    '''Not used in distributed mode.
    '''
    metadata_list = []
    time_stamp = None
    while True:
        time.sleep(1)
        try:
            metadata_list.append(work_queue.get_nowait())
            if not time_stamp:
                time_stamp = time.time()
            while len(metadata_list) < batch_size:
                metadata_list.append(work_queue.get_nowait())
        except queue.Empty:
            if metadata_list and time_stamp:
                if len(metadata_list) >= batch_size:
                    pass
                elif time.time() - time_stamp > batch_interval:
                    pass
                else:
                    continue
            else:
                continue

        filelist = [item[0] for item in metadata_list]
        # modulelist = [item[5] for item in metadata_list]
        resultlist = multiscan(
            filelist, configfile=MS_CONFIG
            # module_list
        )
        results = parse_reports(resultlist, python=True)

        scan_time = datetime.now().isoformat()

        if delete_after_scan:
            for file_name in results:
                os.remove(file_name)

        for item in metadata_list:
            # Use the original filename as the index instead of the full path
            results[item[1]] = results[item[0]]
            del results[item[0]]

            results[item[1]]['Scan Metadata'] = item[4]
            results[item[1]]['Scan Metadata']['Scan Time'] = scan_time
            results[item[1]]['Scan Metadata']['Task ID'] = item[2]
            results[item[1]]['tags'] = results[item[1]]['Scan Metadata'].get('Tags', '').split(',')
            results[item[1]]['Scan Metadata'].pop('Tags', None)

            db.update_task(
                task_id=item[2],
                task_status='Complete',
                timestamp=scan_time,
            )
        metadata_list = []

        storage_handler.store(results, wait=False)

        filelist = []
        time_stamp = None
    storage_handler.close()
Beispiel #16
0
def multiscanner_celery(file_, original_filename, task_id, file_hash, metadata, config=multiscanner.CONFIG, module_list=None):
    '''
    Queue up multiscanner tasks

    Usage:
    from celery_worker import multiscanner_celery
    multiscanner_celery.delay(full_path, original_filename, task_id,
                              hashed_filename, metadata, config, module_list)
    '''
    # Initialize the connection to the task DB
    db.init_db()

    logger.info('\n\n{}{}Got file: {}.\nOriginal filename: {}.\n'.format('='*48, '\n', file_hash, original_filename))

    # Get the storage config
    storage_conf = multiscanner.common.get_config_path(config, 'storage')
    storage_handler = multiscanner.storage.StorageHandler(configfile=storage_conf)

    resultlist = multiscanner.multiscan(
        [file_],
        configfile=config,
        module_list=module_list
    )
    results = multiscanner.parse_reports(resultlist, python=True)

    scan_time = datetime.now().isoformat()

    # Get the Scan Config that the task was run with and
    # add it to the task metadata
    scan_config_object = configparser.SafeConfigParser()
    scan_config_object.optionxform = str
    scan_config_object.read(config)
    full_conf = common.parse_config(scan_config_object)
    sub_conf = {}
    # Count number of modules enabled out of total possible
    # and add it to the Scan Metadata
    total_enabled = 0
    total_modules = 0
    for key in full_conf:
        if key == 'main':
            continue
        sub_conf[key] = {}
        sub_conf[key]['ENABLED'] = full_conf[key]['ENABLED']
        total_modules += 1
        if sub_conf[key]['ENABLED'] is True:
            total_enabled += 1

    results[file_]['Scan Metadata'] = {}
    results[file_]['Scan Metadata']['Worker Node'] = gethostname()
    results[file_]['Scan Metadata']['Scan Config'] = sub_conf
    results[file_]['Scan Metadata']['Modules Enabled'] = '{} / {}'.format(
        total_enabled, total_modules
    )

    # Use the original filename as the value for the filename
    # in the report (instead of the tmp path assigned to the file
    # by the REST API)
    results[original_filename] = results[file_]
    del results[file_]

    results[original_filename]['Scan Time'] = scan_time
    results[original_filename]['Metadata'] = metadata

    # Update the task DB to reflect that the task is done
    db.update_task(
        task_id=task_id,
        task_status='Complete',
        timestamp=scan_time,
    )

    # Save the reports to storage
    storage_handler.store(results, wait=False)
    storage_handler.close()
    logger.info('Completed Task #{}'.format(task_id))

    return results
Beispiel #17
0
def multiscanner_celery(file_, original_filename, task_id, file_hash, metadata,
                        config=MS_CONFIG, module_list=None):
    '''
    Queue up multiscanner tasks

    Usage:
    from celery_worker import multiscanner_celery
    multiscanner_celery.delay(full_path, original_filename, task_id,
                              hashed_filename, metadata, config, module_list)
    '''

    # Initialize the connection to the task DB
    db.init_db()

    logger.info('\n\n{}{}Got file: {}.\nOriginal filename: {}.\n'.format('=' * 48, '\n', file_hash, original_filename))

    # Get the storage config
    storage_conf = utils.get_config_path(config, 'storage')
    storage_handler = storage.StorageHandler(configfile=storage_conf)

    resultlist = multiscan(
        [file_],
        configfile=config,
        module_list=module_list
    )
    results = parse_reports(resultlist, python=True)

    scan_time = datetime.now().isoformat()

    # Get the Scan Config that the task was run with and
    # add it to the task metadata
    scan_config_object = configparser.SafeConfigParser()
    scan_config_object.optionxform = str
    scan_config_object.read(config)
    full_conf = utils.parse_config(scan_config_object)
    sub_conf = {}
    # Count number of modules enabled out of total possible
    # and add it to the Scan Metadata
    total_enabled = 0
    total_modules = len(full_conf.keys())

    # Get the count of modules enabled from the module_list
    # if it exists, else count via the config
    if module_list:
        total_enabled = len(module_list)
    else:
        for key in full_conf:
            if key == 'main':
                continue
            sub_conf[key] = {}
            sub_conf[key]['ENABLED'] = full_conf[key]['ENABLED']
            if sub_conf[key]['ENABLED'] is True:
                total_enabled += 1

    results[file_]['Scan Metadata'] = metadata
    results[file_]['Scan Metadata']['Worker Node'] = gethostname()
    results[file_]['Scan Metadata']['Scan Config'] = sub_conf
    results[file_]['Scan Metadata']['Modules Enabled'] = '{} / {}'.format(
        total_enabled, total_modules
    )
    results[file_]['Scan Metadata']['Scan Time'] = scan_time
    results[file_]['Scan Metadata']['Task ID'] = task_id

    # Use the original filename as the value for the filename
    # in the report (instead of the tmp path assigned to the file
    # by the REST API)
    results[original_filename] = results[file_]
    del results[file_]

    # Save the reports to storage
    storage_ids = storage_handler.store(results, wait=False)
    storage_handler.close()

    # Only need to raise ValueError here,
    # Further cleanup will be handled by the on_failure method
    # of MultiScannerTask
    if not storage_ids:
        raise ValueError('Report failed to index')

    # Update the task DB to reflect that the task is done
    db.update_task(
        task_id=task_id,
        task_status='Complete',
        timestamp=scan_time,
    )

    logger.info('Completed Task #{}'.format(task_id))

    return results
Beispiel #18
0
    Parses arguments
    """
    import argparse
    import parser
    #argparse stuff
    parser = argparse.ArgumentParser(description="Scan files and store results in elastic search")
    parser.add_argument("-r", "--recursive", action="store_true")
    parser.add_argument("-v", "--verbose", action="store_true")
    parser.add_argument('Files', help="Files and Directories to attach", nargs='+')
    return parser.parse_args()

def results2es(results):
    """
    Takes a dictionary of Filename: {Results} and stores it in elastic search.
    """
    es = elasticsearch.Elasticsearch(hosts=ES_HOSTS)
    es.indices.create(index=ES_INDEX, ignore=400)
    for fname in results:
        result = results[fname]
        result['filename'] = fname
        es.index(index=ES_INDEX, doc_type=ES_DOCTYPE, id=result['SHA256'], body=result)

if __name__ == '__main__':
    args = parse_args()
    print "Starting scan..."
    results = multiscanner.multiscan(args.Files, recursive=args.recursive)
    results = multiscanner.parse_reports(results, python=True, includeMetadata=False)
    print "Storing results..."
    results2es(results)
    print "Done!"
def test_invalid_utf8_python():
    reportlist = [([('file', '\x97안녕하세요')], {'Name': 'Test', 'Type': 'Test'})]
    r = multiscanner.parse_reports(reportlist, python=True)
    assert r == {"file":{"Test":"\x97안녕하세요"}} or r == {"file":{"Test":u"\ufffd안녕하세요"}}
def test_invalid_utf8_string():
    reportlist = [([('file', '\x97안녕하세요')], {'Name': 'Test', 'Type': 'Test'})]
    r = multiscanner.parse_reports(reportlist, python=False)
    assert r == u'{"file":{"Test":"\x97안녕하세요"}}' or r == u'{"file":{"Test":"\ufffd안녕하세요"}}'
def test_invalid_utf8_string():
    reportlist = [([('file', '\x97안녕하세요')], {'Name': 'Test', 'Type': 'Test'})]
    r = multiscanner.parse_reports(reportlist, python=False)
    assert r == u'{"file":{"Test":"\x97안녕하세요"}}' or r == u'{"file":{"Test":"\ufffd안녕하세요"}}'
def test_valid_reports_string():
    reportlist = [([('file', 'result')], {'Name': 'Test', 'Type': 'Test'})]
    r = multiscanner.parse_reports(reportlist, python=False)
    assert r == '{"file":{"Test":"result"}}'