def _new_file(fileobj, session): sha256 = sha256sum(fileobj) # split files between subdirs path = build_sha256_path(sha256) try: # The file exists log.debug("try opening file with sha256: %s", sha256) file = File.load_from_sha256(sha256, session) if file.path is None: log.debug("file sample missing writing it") save_to_file(fileobj, path) file.path = path except IrmaDatabaseResultNotFound: # It doesn't time = compat.timestamp() sha1 = sha1sum(fileobj) md5 = md5sum(fileobj) # determine file mimetype magic = Magic() # magic only deal with buffer # feed it with a 4MB buffer mimetype = magic.from_buffer(fileobj.read(2**22)) size = save_to_file(fileobj, path) log.debug( "not present, saving, sha256 %s sha1 %s" "md5 %s size %s mimetype: %s", sha256, sha1, md5, size, mimetype) file = File(sha256, sha1, md5, size, mimetype, path, time, time) session.add(file) return file
def test005_classmethod_load_from_sha256_raise_MultipleResultNotFound(self): # nopep8 sample = "test" session = MagicMock() session.query.side_effect = MultipleResultsFound(sample) with self.assertRaises(IrmaDatabaseError) as context: File.load_from_sha256("whatever", session) self.assertEqual(str(context.exception), sample)
def test004_classmethod_load_from_sha256_raise_NoResultFound(self): sample = "test" session = MagicMock() session.query.side_effect = NoResultFound(sample) with self.assertRaises(IrmaDatabaseResultNotFound) as context: File.load_from_sha256("whatever", session) self.assertEqual(str(context.exception), sample) self.assertFalse(module.write_sample_on_disk.called)
def test010_remove_old_files(self): m_session = MagicMock() m_file = MagicMock() m_session.query().filter().filter().all.return_value = [m_file] res = File.remove_old_files(10, m_session) m_file.remove_file_from_fs.assert_called_once() self.assertEqual(res, 1)
def set_result(scanid, file_hash, probe, result): with session_transaction() as session: scan = Scan.load_from_ext_id(scanid, session=session) fws = scan.get_filewebs_by_sha256(file_hash) if len(fws) == 0: log.error("file %s not found in scan", file_hash) return fws_file = File.load_from_sha256(file_hash, session) fws_file.timestamp_last_scan = compat.timestamp() fws_file.update(['timestamp_last_scan'], session=session) sanitized_res = _sanitize_res(result) # update results for all files with same sha256 for fw in fws: # Update main reference results with fresh results pr = _fetch_probe_result(fw, probe) _update_ref_results(fw, fw.file, pr) fw.file.update(session=session) # fill ProbeResult with probe raw results pr.doc = sanitized_res pr.status = sanitized_res.get('status', None) s_type = sanitized_res.get('type', None) pr.type = IrmaProbeType.normalize(s_type) pr.update(session=session) probedone = [] for fw_pr in fw.probe_results: if fw_pr.doc is not None: probedone.append(fw_pr.name) log.info("scanid: %s result from %s probedone %s", scanid, probe, probedone) is_finished(scanid)
def get(sha256, db): """ Detail about one file and all known scans summary where file was present (identified by sha256). Support pagination. :param all params are sent using query method :param if alt parameter is "media", response will contains the binary data :rtype: dict of 'total': int, 'page': int, 'per_page': int, :return: on success fileinfo contains file information on success 'items' contains a list of files found on error 'msg' gives reason message """ try: log.debug("h_value %s", sha256) # Check wether its a download attempt or not if request.query.alt == "media": return _download(sha256, db) # Get values from query or default offset = request.query.get("offset", default=0) offset = int(offset) limit = request.query.get("limit", default=25) limit = int(limit) file = File.load_from_sha256(sha256, db) # query all known results not only those with different names base_query = FileWeb.query_find_by_hash("sha256", sha256, None, db, distinct_name=False) # TODO: Find a way to move pagination as a BaseQuery like in # flask_sqlalchemy. # https://github.com/mitsuhiko/flask-sqlalchemy/blob/master/flask_sqlalchemy/__init__.py#L422 items = base_query.limit(limit).offset(offset).all() if offset == 0 and len(items) < limit: total = len(items) else: total = base_query.count() log.debug("offset %d limit %d total %d", offset, limit, total) file_web_schema = FileWebSchema_v1_1(exclude=('probe_results', 'file_infos')) fileinfo_schema = FileSchema_v1_1() # TODO: allow formatted to be a parameter formatted = True fileinfo_schema.context = {'formatted': formatted} response.content_type = "application/json; charset=UTF-8" return { 'file_infos': fileinfo_schema.dump(file).data, 'total': total, 'offset': offset, 'limit': limit, 'items': file_web_schema.dump(items, many=True).data, } except Exception as e: log.exception(e) process_error(e)
def test019_load_from_sha256_no_more_exists(self, m_os): path = "RandomPath" self.file.path = path m_os.path.exists.return_value = False m_session = MagicMock() m_session.query().filter().one.return_value = self.file ret_file = File.load_from_sha256("sha256", m_session) self.assertEqual(ret_file, self.file) self.assertIsNone(self.file.path)
def remove_tag(sha256, tagid, db): """ Remove a tag attached to a file. """ try: log.debug("h_value %s tagid %s", sha256, tagid) fobj = File.load_from_sha256(sha256, db) fobj.remove_tag(tagid, db) db.commit() except Exception as e: log.exception(e) process_error(e)
def add_tag(sha256, tagid, db): """ Attach a tag to a file. """ try: log.debug("h_value %s tagid %s", sha256, tagid) fobj = File.load_from_sha256(sha256, db) fobj.add_tag(tagid, db) db.commit() except Exception as e: log.exception(e) process_error(e)
def setUp(self): self.sha256 = "sha256" self.sha1 = "sha1" self.md5 = "md5" self.size = 1024 self.mimetype = "MimeType" self.path = "path" self.first_ts = 0 self.last_ts = 1 self.file = File(self.sha256, self.sha1, self.md5, self.size, self.mimetype, self.path, self.first_ts, self.last_ts)
def test006_classmethod_load_from_sha256_True(self): sha = "sha_test" session = MagicMock() session.query().filter().one().path = None File.sha256 = sha result = File.load_from_sha256(sha, session) self.assertTrue(session.query.called) self.assertEqual(session.query.call_args, ((File,),)) self.assertTrue(session.query().filter.called) self.assertEqual(session.query().filter.call_args, ((True,),)) self.assertTrue(session.query().filter().one.called) self.assertEqual(session.query().filter().one.call_args, (tuple(),)) self.assertEqual(result, session.query().filter().one())
def _download(sha256, db): """Retrieve a file based on its sha256""" log.debug("h_value %s", sha256) fobj = File.load_from_sha256(sha256, db) # check if file is still present if fobj.path is None: raise IrmaDatabaseResultNotFound("downloading a removed file") # Force download ctype = 'application/octet-stream; charset=UTF-8' # Suggest Filename to sha256 cdisposition = "attachment; filename={}".format(sha256) response.headers["Content-Type"] = ctype response.headers["Content-Disposition"] = cdisposition return open(fobj.path).read()
def handle_output_files(scanid, parent_file_hash, probe, result): with session_transaction() as session: scan = Scan.load_from_ext_id(scanid, session=session) uploaded_files = result.get('uploaded_files', None) if uploaded_files is None or not scan.resubmit_files: log.debug("scanid: %s Nothing to resubmit or resubmit disabled", scanid) return log.info("scanid: %s appending new uploaded files %s", scanid, uploaded_files.keys()) parent_file = File.load_from_sha256(parent_file_hash, session) # filter already present file in current scan hash_uploaded = [f.sha256 for f in scan.files] new_fws = _append_new_files_to_scan(scan, uploaded_files, session) for fw in new_fws: parent_file.children.append(fw) _resubmit_files(scan, parent_file, new_fws, hash_uploaded, session)
def test007_classmethod_load_from_sha256_path_is_None(self): sha, data = "sha_test", "data_test" session = MagicMock() session.query().filter().one().path = None File.sha256 = sha File.data = data result = File.load_from_sha256(sha, session, data) self.assertTrue(session.query.called) self.assertEqual(session.query.call_args, ((File,),)) self.assertTrue(session.query().filter.called) self.assertEqual(session.query().filter.call_args, ((True,),)) self.assertTrue(session.query().filter().one.called) self.assertEqual(session.query().filter().one.call_args, (tuple(),)) self.assertEqual(result, session.query().filter().one()) self.assertTrue(module.write_sample_on_disk.called) self.assertEquals(module.write_sample_on_disk.call_args, ((sha, data),))
def _download_zip(hash_list, db, infos): s = StringIO.StringIO() # Create zip archive zf = zipfile.ZipFile(s,'w') for i, val in enumerate(hash_list): # Retrieve a file based on its sha256""" fobj = File.load_from_sha256(val, db) #log.debug("Debug :: download_zip :: items[%s] = %s ::",i, fobj) if fobj.path is None: raise IrmaDatabaseResultNotFound("downloading a removed file") # Add file to archive zf.write(fobj.path,fobj.sha256) for val in infos: # Timestamp to readable date scan_date = str(datetime.datetime.fromtimestamp(val['scan_date'])) val['scan_date']= scan_date content = str(val) name = val['file_sha256']+".info" #log.debug('debug :: download_zip :: content = %s', content) # Write file info in zip archive zf.writestr(name,content) ctype = 'application/zip' # Suggest Filename to "irma_archive" # Todo: generate archive name dynamically. cdisposition = "attachment; filename={}".format('irma_archive.zip') response.headers["Content-Type"] = ctype response.headers["Content-Disposition"] = cdisposition zf.close() return s.getvalue()
def remove_files(max_age_sec): with session_transaction() as session: nb_deleted = File.remove_old_files(max_age_sec, session) log.debug("Max_age_sec: %s Nb_deleted: %s", max_age_sec) return nb_deleted
class TestFile(TestCase): def setUp(self): self.sha256 = "sha256" self.sha1 = "sha1" self.md5 = "md5" self.size = 1024 self.mimetype = "MimeType" self.path = "path" self.first_ts = 0 self.last_ts = 1 self.file = File(self.sha256, self.sha1, self.md5, self.size, self.mimetype, self.path, self.first_ts, self.last_ts) def tearDown(self): del self.file def test001___init__(self): self.assertEqual(self.file.timestamp_first_scan, self.first_ts) self.assertEqual(self.file.timestamp_last_scan, self.last_ts) self.assertEqual(self.file.tags, list()) self.assertIsInstance(self.file, File) self.assertIsInstance(self.file, SQLDatabaseObject) def test002_to_json(self): expected = {'md5': self.md5, "sha1": self.sha1, "sha256": self.sha256, "size": self.size, "timestamp_first_scan": self.first_ts, "timestamp_last_scan": self.last_ts} self.assertEqual(self.file.to_json(), expected) def test003_to_json_more_stuff(self): base = {"md5": "m_test", "sha1": "s_test", "sha256": "s256_test", "size": "si_test"} for key, value in base.items(): setattr(self.file, key, value) base["timestamp_first_scan"] = self.first_ts base["timestamp_last_scan"] = self.last_ts result = self.file.to_json() self.assertEqual(result, base) def test004_classmethod_load_from_sha256_raise_NoResultFound(self): sample = "test" session = MagicMock() session.query.side_effect = NoResultFound(sample) with self.assertRaises(IrmaDatabaseResultNotFound) as context: File.load_from_sha256("whatever", session) self.assertEqual(str(context.exception), sample) def test005_classmethod_load_from_sha256_raise_MultipleResultNotFound(self): # nopep8 sample = "test" session = MagicMock() session.query.side_effect = MultipleResultsFound(sample) with self.assertRaises(IrmaDatabaseError) as context: File.load_from_sha256("whatever", session) self.assertEqual(str(context.exception), sample) def test006_classmethod_load_from_sha256_True(self): sha = "sha_test" session = MagicMock() session.query().filter().one().path = None File.sha256 = sha result = File.load_from_sha256(sha, session) self.assertTrue(session.query.called) self.assertEqual(session.query.call_args, ((File,),)) self.assertTrue(session.query().filter.called) self.assertEqual(session.query().filter.call_args, ((True,),)) self.assertTrue(session.query().filter().one.called) self.assertEqual(session.query().filter().one.call_args, (tuple(),)) self.assertEqual(result, session.query().filter().one()) def test007_classmethod_load_from_sha256_path_is_None(self): sha, data = "sha_test", TemporaryFile() session = MagicMock() session.query().filter().one().path = None File.sha256 = sha File.data = data result = File.load_from_sha256(sha, session) self.assertTrue(session.query.called) self.assertEqual(session.query.call_args, ((File,),)) self.assertTrue(session.query().filter.called) self.assertEqual(session.query().filter.call_args, ((True,),)) self.assertTrue(session.query().filter().one.called) self.assertEqual(session.query().filter().one.call_args, (tuple(),)) self.assertEqual(result, session.query().filter().one()) def test008_get_file_names_empty(self): self.assertEqual(self.file.get_file_names(), list()) def test009_get_file_names_some(self): a, b, c = MagicMock(), MagicMock(), MagicMock() a.name, b.name, c.name = str(a), str(b), str(c) self.file.files_web = [a, b, c] res = self.file.get_file_names() self.assertItemsEqual(res, [str(a), str(b), str(c)]) def test010_remove_old_files(self): m_session = MagicMock() m_file = MagicMock() m_session.query().filter().filter().all.return_value = [m_file] res = File.remove_old_files(10, m_session) m_file.remove_file_from_fs.assert_called_once() self.assertEqual(res, 1) def test011_get_tags(self): m_tag = MagicMock() self.file.tags = [m_tag] res = self.file.get_tags() self.assertIs(type(res), list) self.assertEquals(res, [m_tag.to_json()]) def test012_add_tag(self): text = "whatever" t = Tag(text=text) m_session = MagicMock() m_session.query(Tag).filter().one.return_value = t self.assertEqual(len(self.file.tags), 0) self.file.add_tag("id", m_session) self.assertEqual(len(self.file.tags), 1) self.assertItemsEqual(self.file.tags, [t]) def test013_add_tag_error(self): text = "whatever" t = Tag(text=text) m_session = MagicMock() m_session.query(Tag).filter().one.return_value = t self.file.add_tag("id", m_session) with self.assertRaises(IrmaDatabaseError): self.file.add_tag("id", m_session) self.assertItemsEqual(self.file.tags, [t]) def test014_remove_tag(self): text = "whatever" t = Tag(text=text) m_session = MagicMock() m_session.query(Tag).filter().one.return_value = t self.assertEqual(len(self.file.tags), 0) self.file.add_tag("id", m_session) self.file.remove_tag("id", m_session) self.assertEqual(len(self.file.tags), 0) def test015_remove_tag_error(self): text = "whatever" t = Tag(text=text) m_session = MagicMock() m_session.query(Tag).filter().one.return_value = t with self.assertRaises(IrmaDatabaseError): self.file.remove_tag("id", m_session) self.assertEqual(len(self.file.tags), 0) @patch("frontend.models.sqlobjects.os") def test016_remove_file_from_fs_path_none(self, m_os): self.file.path = None self.file.remove_file_from_fs() m_os.remove.assert_not_called() @patch("frontend.models.sqlobjects.os") def test017_remove_file_from_fs(self, m_os): path = "RandomPath" self.file.path = path self.file.remove_file_from_fs() m_os.remove.assert_called_once_with(path) self.assertIsNone(self.file.path) @patch("frontend.models.sqlobjects.os") def test018_remove_file_from_fs_error(self, m_os): path = "RandomPath" self.file.path = path m_os.remove.side_effect = OSError with self.assertRaises(IrmaFileSystemError): self.file.remove_file_from_fs() @patch("frontend.models.sqlobjects.os") def test019_load_from_sha256_no_more_exists(self, m_os): path = "RandomPath" self.file.path = path m_os.path.exists.return_value = False m_session = MagicMock() m_session.query().filter().one.return_value = self.file ret_file = File.load_from_sha256("sha256", m_session) self.assertEqual(ret_file, self.file) self.assertIsNone(self.file.path)
def setUp(self): self.first_ts = 0 self.last_ts = 1 self.file = File(self.first_ts, self.last_ts) self.old_write_sample_on_disk = module.write_sample_on_disk module.write_sample_on_disk = MagicMock()
class TestFile(TestCase): def setUp(self): self.first_ts = 0 self.last_ts = 1 self.file = File(self.first_ts, self.last_ts) self.old_write_sample_on_disk = module.write_sample_on_disk module.write_sample_on_disk = MagicMock() def tearDown(self): del self.file module.write_sample_on_disk = self.old_write_sample_on_disk def test001___init__(self): self.assertEqual(self.file.timestamp_first_scan, self.first_ts) self.assertEqual(self.file.timestamp_last_scan, self.last_ts) self.assertEqual(self.file.tags, list()) self.assertIsInstance(self.file, File) self.assertIsInstance(self.file, SQLDatabaseObject) def test002_to_json(self): expected = {"timestamp_first_scan": self.first_ts, "timestamp_last_scan": self.last_ts} self.assertEqual(self.file.to_json(), expected) def test003_to_json_more_stuff(self): base = {"md5": "m_test", "sha1": "s_test", "sha256": "s256_test", "size": "si_test"} for key, value in base.items(): setattr(self.file, key, value) base["timestamp_first_scan"] = self.first_ts base["timestamp_last_scan"] = self.last_ts result = self.file.to_json() self.assertEqual(result, base) def test004_classmethod_load_from_sha256_raise_NoResultFound(self): sample = "test" session = MagicMock() session.query.side_effect = NoResultFound(sample) with self.assertRaises(IrmaDatabaseResultNotFound) as context: File.load_from_sha256("whatever", session) self.assertEqual(str(context.exception), sample) self.assertFalse(module.write_sample_on_disk.called) def test005_classmethod_load_from_sha256_raise_MultipleResultNotFound(self): # nopep8 sample = "test" session = MagicMock() session.query.side_effect = MultipleResultsFound(sample) with self.assertRaises(IrmaDatabaseError) as context: File.load_from_sha256("whatever", session) self.assertEqual(str(context.exception), sample) self.assertFalse(module.write_sample_on_disk.called) def test006_classmethod_load_from_sha256_True(self): sha = "sha_test" session = MagicMock() session.query().filter().one().path = None File.sha256 = sha result = File.load_from_sha256(sha, session) self.assertTrue(session.query.called) self.assertEqual(session.query.call_args, ((File,),)) self.assertTrue(session.query().filter.called) self.assertEqual(session.query().filter.call_args, ((True,),)) self.assertTrue(session.query().filter().one.called) self.assertEqual(session.query().filter().one.call_args, (tuple(),)) self.assertEqual(result, session.query().filter().one()) self.assertFalse(module.write_sample_on_disk.called) def test007_classmethod_load_from_sha256_path_is_None(self): sha, data = "sha_test", "data_test" session = MagicMock() session.query().filter().one().path = None File.sha256 = sha File.data = data result = File.load_from_sha256(sha, session, data) self.assertTrue(session.query.called) self.assertEqual(session.query.call_args, ((File,),)) self.assertTrue(session.query().filter.called) self.assertEqual(session.query().filter.call_args, ((True,),)) self.assertTrue(session.query().filter().one.called) self.assertEqual(session.query().filter().one.call_args, (tuple(),)) self.assertEqual(result, session.query().filter().one()) self.assertTrue(module.write_sample_on_disk.called) self.assertEquals(module.write_sample_on_disk.call_args, ((sha, data),)) def test008_get_file_names_empty(self): self.assertEqual(self.file.get_file_names(), list()) def test009_get_file_names_some(self): # TODO: finish this test self.files_web = list(MagicMock())
def remove_files(max_age_sec): with session_transaction() as session: return File.remove_old_files(max_age_sec, session)
def remove_files(max_age_sec): with session_transaction() as session: nb_deleted = File.remove_old_files(max_age_sec, session) log.debug("Max_age_sec: %s Nb_deleted: %s", max_age_sec, nb_deleted) return nb_deleted