def test_extract(self, capsys, filename): assert os.listdir(os.getcwd()) == [] instance = Extractor() res = instance.extract(archive_path=os.path.join(FIXTURE_DIR, filename), output_dir=".") assert res is True assert instance.err is None assert os.listdir(os.getcwd()) == ['Mac.pdf']
def test_archive_does_not_exist(self, capsys): assert os.listdir(os.getcwd()) == [] instance = Extractor() res = instance.extract(archive_path="/foobar97839872") assert res is False assert re.search("file does not exist", instance.err) assert os.listdir(os.getcwd()) == []
def test_file_with_unknown_extension(self, capsys, filename): assert os.listdir(os.getcwd()) == [] instance = Extractor() res = instance.extract(archive_path=os.path.join(FIXTURE_DIR, filename), output_dir=".") assert res is False assert instance.err is not None assert re.search("invalid archive_path", instance.err) assert os.listdir(os.getcwd()) == []
def test_specified_cls_name_does_not_exist(self, capsys, filename): assert os.listdir(os.getcwd()) == [] instance = Extractor() res = instance.extract(archive_path=os.path.join(FIXTURE_DIR, filename), output_dir=".", cls_name="ThisDoesNotExistExtractor") assert res is False assert instance.err is not None assert re.search("invalid Extractor", instance.err) assert os.listdir(os.getcwd()) == []
def test_extract_password_umlaut_fail(self, capsys, filename): assert os.listdir(os.getcwd()) == [] instance = Extractor() # this requires from __future__ import unicode_literals on Python2 ! res = instance.extract(archive_path=os.path.join(FIXTURE_DIR, filename), output_dir=".", password="******") assert res is False assert instance.err is not None assert os.listdir(os.getcwd()) == []
def test_extract_password_exclamation_fail(self, capsys, filename): assert os.listdir(os.getcwd()) == [] instance = Extractor() res = instance.extract(archive_path=os.path.join(FIXTURE_DIR, filename), output_dir=".", password="******") assert res is False assert pytest.raises(subprocess.CalledProcessError) assert instance.err is not None assert os.listdir(os.getcwd()) == []
def test_extract_tmp(self, capsys, filename): assert os.listdir(os.getcwd()) == [] instance = Extractor() res = instance.extract(archive_path=os.path.join(FIXTURE_DIR, filename)) assert res is True assert instance.err is None assert os.listdir(os.getcwd()) == [] assert os.listdir(instance.output_path) == ['Mac.pdf'] assert os.path.dirname(instance.output_path).startswith("/tmp") shutil.rmtree(instance.output_path)
def test_extract_input_file_special_chars(self, capsys, filename, rename_to): assert os.listdir(os.getcwd()) == [] shutil.copy(os.path.join(FIXTURE_DIR, filename), rename_to) if sys.version_info < (3, 0): # py2 pass # not working else: # py3+ assert os.listdir(os.getcwd()) == [rename_to] instance = Extractor() res = instance.extract(archive_path=rename_to, output_dir=".") assert res is True assert instance.err is None assert len(os.listdir(os.getcwd())) == 2 assert 'Mac.pdf' in os.listdir(os.getcwd()) if sys.version_info < (3, 0): # py2 pass # not working else: # py3+ assert rename_to in os.listdir(os.getcwd())
def upload(self, request, project=None, db=None, *args, **kwargs): """Upload file as new Malware instance""" session = db.Session() serializer = self.get_serializer(data=request.data) serializer.is_valid(raise_exception=True) log.debug("Validated Data: {}".format(serializer.validated_data)) store_archive = serializer.validated_data.get("store_archive", None) archive_pass = serializer.validated_data.get("archive_pass", None) extractor = serializer.validated_data.get("extractor", None) tag_list = serializer.validated_data.get("tag_list", None) note_title = serializer.validated_data.get("note_title", None) note_body = serializer.validated_data.get("note_body", None) uploaded_files = serializer.validated_data.get("file", None) uploaded_file_name = serializer.validated_data.get("file_name", None) to_process = list() tmp_dirs = list() for uploaded_file in uploaded_files: log.debug("Working on: {}".format(uploaded_file)) # if a file name was provided (to override name of uploaded file) then us it uploaded_file_path = uploaded_file.temporary_file_path() log.debug("Working on (Path): {}".format(uploaded_file_path)) if not uploaded_file_name: uploaded_file_name = "{}".format(uploaded_file) if extractor: log.debug("Extractor: {}".format(extractor)) if extractor == "auto": tmp_dir = tempfile.mkdtemp(prefix="viper_tmp_") tmp_dirs.append(tmp_dir) new_uploaded_file = os.path.join(tmp_dir, uploaded_file_name) # os.rename(uploaded_file_path, new_uploaded_file) # TODO(frennkie) renaming causes Django to raise an error because it can't delete tmp file shutil.copy( uploaded_file_path, new_uploaded_file ) # TODO(frennkie) copying temporay file seems a bit wasteful (I/O, disk space) ext = Extractor() _, uploaded_file_extension = ext.auto_discover_ext( new_uploaded_file) if uploaded_file_extension in ext.extensions: res = ext.extract(archive_path=new_uploaded_file, password=archive_pass) else: to_process.append( (new_uploaded_file, uploaded_file_name)) continue else: new_uploaded_file = uploaded_file_path ext = Extractor() res = ext.extract(archive_path=new_uploaded_file, cls_name=extractor, password=archive_pass) if res: log.debug("Extract Result: {} - Path: {}".format( res, ext.output_path)) if not os.path.isdir(ext.output_path): # make sure to only add directories to tmp_dirs list ext.output_path = os.path.dirname(ext.output_path) tmp_dirs.append(ext.output_path) for dir_name, dir_names, file_names in walk( ext.output_path): # Add each collected file. for file_name in file_names: to_process.append( (os.path.join(dir_name, file_name), file_name)) if store_archive: log.debug("need to store the Archive too") to_process.insert( 0, (new_uploaded_file, os.path.basename(new_uploaded_file))) # TODO(frennkie) Parent Child relation?! else: log.debug("Extract Result: {}".format(res)) # TODO(frennkie) raise?! else: log.debug( "No Extractor will be used, just store uploaded file") if uploaded_file_name: to_process.append((uploaded_file_path, uploaded_file_name)) else: to_process.append( (uploaded_file_path, "{}".format(uploaded_file))) # reset uploaded_file_name uploaded_file_name = None processed = list() for item in to_process: processed.append( self._process_uploaded(db, item[0], item[1], tag_list, note_title, note_body) ) # TODO(frennkie) Error handling (e.g. duplicate hashes?!) log.debug("Tmp Dirs: {}".format(tmp_dirs)) for item in tmp_dirs: try: shutil.rmtree(item) except OSError as err: log.error("failed to delete temporary dir: {}".format(err)) if not len(processed): log.error("failed..") raise Exception("Something went wrong") elif len(processed) == 1: obj = session.query(Malware).filter( Malware.sha256 == processed[0].sha256).one_or_none() serializer = self.get_serializer([obj], many=True) headers = self.get_success_headers(serializer.data) return Response(serializer.data, status=201, headers=headers) else: obj_list = [ session.query(Malware).filter( Malware.sha256 == x.sha256).one_or_none() for x in processed ] serializer = self.get_serializer(obj_list, many=True) headers = self.get_success_headers(serializer.data) return Response(serializer.data, status=201, headers=headers)
def upload(self, request, project=None, db=None, *args, ** kwargs): """Upload file as new Malware instance""" session = db.Session() serializer = self.get_serializer(data=request.data) serializer.is_valid(raise_exception=True) log.debug("Validated Data: {}".format(serializer.validated_data)) store_archive = serializer.validated_data.get("store_archive", None) archive_pass = serializer.validated_data.get("archive_pass", None) extractor = serializer.validated_data.get("extractor", None) tag_list = serializer.validated_data.get("tag_list", None) note_title = serializer.validated_data.get("note_title", None) note_body = serializer.validated_data.get("note_body", None) uploaded_files = serializer.validated_data.get("file", None) uploaded_file_name = serializer.validated_data.get("file_name", None) to_process = list() tmp_dirs = list() for uploaded_file in uploaded_files: log.debug("Working on: {}".format(uploaded_file)) # if a file name was provided (to override name of uploaded file) then us it uploaded_file_path = uploaded_file.temporary_file_path() log.debug("Working on (Path): {}".format(uploaded_file_path)) if not uploaded_file_name: uploaded_file_name = "{}".format(uploaded_file) if extractor: log.debug("Extractor: {}".format(extractor)) if extractor == "auto": tmp_dir = tempfile.mkdtemp(prefix="viper_tmp_") tmp_dirs.append(tmp_dir) new_uploaded_file = os.path.join(tmp_dir, uploaded_file_name) # os.rename(uploaded_file_path, new_uploaded_file) # TODO(frennkie) renaming causes Django to raise an error because it can't delete tmp file shutil.copy(uploaded_file_path, new_uploaded_file) # TODO(frennkie) copying temporay file seems a bit wasteful (I/O, disk space) ext = Extractor() _, uploaded_file_extension = ext.auto_discover_ext(new_uploaded_file) if uploaded_file_extension in ext.extensions: res = ext.extract(archive_path=new_uploaded_file, password=archive_pass) else: to_process.append((new_uploaded_file, uploaded_file_name)) continue else: new_uploaded_file = uploaded_file_path ext = Extractor() res = ext.extract(archive_path=new_uploaded_file, cls_name=extractor, password=archive_pass) if res: log.debug("Extract Result: {} - Path: {}".format(res, ext.output_path)) if not os.path.isdir(ext.output_path): # make sure to only add directories to tmp_dirs list ext.output_path = os.path.dirname(ext.output_path) tmp_dirs.append(ext.output_path) for dir_name, dir_names, file_names in walk(ext.output_path): # Add each collected file. for file_name in file_names: to_process.append((os.path.join(dir_name, file_name), file_name)) if store_archive: log.debug("need to store the Archive too") to_process.insert(0, (new_uploaded_file, os.path.basename(new_uploaded_file))) # TODO(frennkie) Parent Child relation?! else: log.debug("Extract Result: {}".format(res)) # TODO(frennkie) raise?! else: log.debug("No Extractor will be used, just store uploaded file") if uploaded_file_name: to_process.append((uploaded_file_path, uploaded_file_name)) else: to_process.append((uploaded_file_path, "{}".format(uploaded_file))) # reset uploaded_file_name uploaded_file_name = None processed = list() for item in to_process: processed.append(self._process_uploaded(db, item[0], item[1], tag_list, note_title, note_body)) # TODO(frennkie) Error handling (e.g. duplicate hashes?!) log.debug("Tmp Dirs: {}".format(tmp_dirs)) for item in tmp_dirs: try: shutil.rmtree(item) except OSError as err: log.error("failed to delete temporary dir: {}".format(err)) if not len(processed): log.error("failed..") raise Exception("Something went wrong") elif len(processed) == 1: obj = session.query(Malware).filter(Malware.sha256 == processed[0].sha256).one_or_none() serializer = self.get_serializer([obj], many=True) headers = self.get_success_headers(serializer.data) return Response(serializer.data, status=201, headers=headers) else: obj_list = [session.query(Malware).filter(Malware.sha256 == x.sha256).one_or_none() for x in processed] serializer = self.get_serializer(obj_list, many=True) headers = self.get_success_headers(serializer.data) return Response(serializer.data, status=201, headers=headers)