def test_string_split(self): for separator in '_', '-', '.', ' ': assert patterns.calculate('foo-bar-baz', [('foo-bar-bat'.replace( '-', separator), None)]) == 'foo-bar-[RANDOM]' assert patterns.calculate('foo-bar-baz'.replace( '-', separator), [('foo-bar-bat', None)]) == 'foo-bar-[RANDOM]'
def test_too_many_different_chunks(self): # all chunks are different assert patterns.calculate('abc-def-ghi', [('foo-bar-baz', None)]) is None # more than one is different assert patterns.calculate('abc-def-ghi', [('abc-jkl-mno', None)]) is None
def test_no_matching_choice(self): # not enough chunks in choices assert patterns.calculate('abc-def-ghi', [('foo', None)]) is None # no choice has the same number of chunks as the s argument assert patterns.calculate('abc-def-ghi', [('foo', None), ('foo-bar', None), ('foo-bar-baz-bat', None)]) is None
def test_random_not_long_enough(self): assert patterns.calculate('foobar-1.zip', ( ('foobar-2.zip', None), )) == 'foobar-1-zip' assert patterns.calculate('foobar-11.zip', ( ('foobar-2.zip', None), )) == 'foobar-11-zip'
def test_string_split(self): for separator in '_', '-', '.', ' ': assert patterns.calculate('foo-bar-baz', [ ('foo-bar-bat'.replace('-', separator), None) ]) == 'foo-bar-[RANDOM]' assert patterns.calculate('foo-bar-baz'.replace('-', separator), [ ('foo-bar-bat', None) ]) == 'foo-bar-[RANDOM]'
def test_too_many_different_chunks(self): # all chunks are different assert patterns.calculate('abc-def-ghi', [ ('foo-bar-baz', None) ]) is None # more than one is different assert patterns.calculate('abc-def-ghi', [ ('abc-jkl-mno', None) ]) is None
def test_no_matching_choice(self): # not enough chunks in choices assert patterns.calculate('abc-def-ghi', [ ('foo', None) ]) is None # no choice has the same number of chunks as the s argument assert patterns.calculate('abc-def-ghi', [ ('foo', None), ('foo-bar', None), ('foo-bar-baz-bat', None) ]) is None
def add_resource(self, resource, vtresult=None, localpart=None, domain=None): logger.debug("Adding resource %s with result %s and to (%s, %s) to database", resource, vtresult, localpart, domain) insert_sql = 'INSERT INTO filenames (filename, pattern, infected, "timestamp", sha256, localpart, domain, chunks) VALUES (?, ?, ?, ?, ?, ?, ?, ?)' update_sql = 'UPDATE filenames SET pattern = ?, timestamp = ? WHERE filename=?' infected = vtresult.infected if vtresult else False pattern = patterns.calculate(resource.filename, self.get_filename_localparts(), localpart=localpart) no_chunks = len(patterns.split_chunks(pattern, localpart)) if pattern else 0 values = ( resource.filename, pattern, infected, datetime.datetime.utcnow(), resource.sha256, localpart, domain, no_chunks ) with AutoDB(self.config.database_path) as db: cursor = None try: cursor = db.connection.cursor() cursor.execute(insert_sql, values) except sqlite3.IntegrityError: cursor.execute(update_sql, (pattern, datetime.datetime.utcnow(), resource.filename)) finally: db.connection.commit() cursor.close() cursor = db.connection.cursor() cursor.execute("UPDATE filenames SET infected=? WHERE sha256=? AND infected=0", (int(infected), resource.sha256)) db.connection.commit() cursor.close()
def update_patterns(self): logger.info("Updating patterns") min_date = datetime.datetime.now() - datetime.timedelta(days=14) sql = 'SELECT id, filename, localpart FROM filenames WHERE pattern IS NULL AND timestamp >= ? AND chunks >= ?' with AutoDB(self.config.database_path) as db: cursor = db.connection.cursor() cursor.execute(sql, (min_date, patterns.MIN_CHUNKS)) result = cursor.fetchall() logger.debug("%s filenames without pattern", len(result)) db.connection.commit() cursor.close() update_sql = 'UPDATE filenames SET pattern=?, chunks=? WHERE id=?' other_filename_localparts = self.get_filename_localparts() update_data = ((patterns.calculate(filename, other_filename_localparts, localpart=localpart), id) for id, filename, localpart in result) with AutoDB(self.config.database_path) as db: cursor = db.connection.cursor() cursor.executemany(update_sql, [(pattern, len(pattern), id) for pattern, id in update_data if pattern]) db.connection.commit() cursor.close()
def update_patterns(self): logger.info("Updating patterns") min_date = datetime.datetime.now() - datetime.timedelta(days=14) sql = 'SELECT id, filename, localpart FROM filenames WHERE pattern IS NULL AND timestamp >= ? AND chunks >= ?' with AutoDB(self.config.database_path) as db: cursor = db.connection.cursor() cursor.execute(sql, (min_date, patterns.MIN_CHUNKS)) result = cursor.fetchall() logger.debug("%s filenames without pattern", len(result)) db.connection.commit() cursor.close() update_sql = 'UPDATE filenames SET pattern=?, chunks=? WHERE id=?' other_filename_localparts = self.get_filename_localparts() update_data = ( (patterns.calculate(filename, other_filename_localparts, localpart=localpart), id) for id, filename, localpart in result ) with AutoDB(self.config.database_path) as db: cursor = db.connection.cursor() cursor.executemany(update_sql, [ (pattern, len(pattern), id) for pattern, id in update_data if pattern ]) db.connection.commit() cursor.close()
def test_pattern_ok(self): """Positive test of pattern.calculate - expect that a simple pattern can be calculated""" # Should detect [RANDOM] assert patterns.calculate('foo-bar-baz', [ ('foo-bar-bat', None) ]) == 'foo-bar-[RANDOM]' assert patterns.calculate('foo-bar-baz', [ ('foo-bar-bat', None), ('foo-bar-bar', None), ('foo-bar-123', None), ]) == 'foo-bar-[RANDOM]' # Should detect localpart 'foo' assert patterns.calculate('foo-bar-baz', [ ('nah-bar-bat', 'nah'), ('bah-bar-bar', 'bah'), ('tah-bar-123', 'tah'), ], localpart='foo') == '[LOCALPART]-bar-[RANDOM]'
def test_pattern_ok(self): """Positive test of pattern.calculate - expect that a simple pattern can be calculated""" # Should detect [RANDOM] assert patterns.calculate( 'foo-bar-baz', [('foo-bar-bat', None)]) == 'foo-bar-[RANDOM]' assert patterns.calculate('foo-bar-baz', [ ('foo-bar-bat', None), ('foo-bar-bar', None), ('foo-bar-123', None), ]) == 'foo-bar-[RANDOM]' # Should detect localpart 'foo' assert patterns.calculate( 'foo-bar-baz', [ ('nah-bar-bat', 'nah'), ('bah-bar-bar', 'bah'), ('tah-bar-123', 'tah'), ], localpart='foo') == '[LOCALPART]-bar-[RANDOM]'
def test_pattern_localpart_with_splitchar(self): localpart = "foo.bar" l = [ ('foo.bar-invoice.zip', '[LOCALPART]-[STATIC]-zip'), ('invoice-foo.bar-invoice.zip', '[STATIC]-[LOCALPART]-[STATIC]-zip'), ] for s, p in l: assert patterns.calculate(s, [ ('webmaster-spreadsheet.zip', 'webmaster'), ('invoice-webmaster-invoice.zip', 'webmaster'), ], localpart=localpart) == p
def filename_pattern_match(self, resource, localpart=None): if not resource: return False pattern = patterns.calculate(resource.filename, self.get_filename_localparts(), localpart=localpart) if not pattern: logger.debug("No pattern for filename '%s'.", resource) return logger.debug("Checking database for pattern: %s", pattern) sql = """SELECT DISTINCT f.pattern, (SELECT COUNT(*) FROM filenames f2 WHERE f2.pattern = f.pattern) AS total_cnt, (SELECT COUNT(*) FROM filenames f3 WHERE f3.pattern = f.pattern AND f3.infected=1) AS infected_cnt FROM filenames f WHERE f.pattern = ?""" with AutoDB(self.config.database_path) as db: cursor = db.connection.cursor() cursor.execute(sql, (pattern, )) result = cursor.fetchone() db.connection.commit() cursor.close() if not result: return False pattern, total, infected = result logger.info("Database result for '%s': total: %s, infected: %s", pattern, total, infected) infected_percent = infected / total logger.debug( "Requirements: %s total matches, %s total (is: %s, %s infected)", self.config.min_filename_patterns, self.config.min_infected_percent, total, infected_percent) return total >= self.config.min_filename_patterns and infected_percent >= self.config.min_infected_percent
def add_resource(self, resource, vtresult=None, localpart=None, domain=None): logger.debug( "Adding resource %s with result %s and to (%s, %s) to database", resource, vtresult, localpart, domain) insert_sql = 'INSERT INTO filenames (filename, pattern, infected, "timestamp", sha256, localpart, domain, chunks) VALUES (?, ?, ?, ?, ?, ?, ?, ?)' update_sql = 'UPDATE filenames SET pattern = ?, timestamp = ? WHERE filename=?' infected = vtresult.infected if vtresult else False pattern = patterns.calculate(resource.filename, self.get_filename_localparts(), localpart=localpart) no_chunks = len(patterns.split_chunks(pattern, localpart)) if pattern else 0 values = (resource.filename, pattern, infected, datetime.datetime.utcnow(), resource.sha256, localpart, domain, no_chunks) with AutoDB(self.config.database_path) as db: cursor = None try: cursor = db.connection.cursor() cursor.execute(insert_sql, values) except sqlite3.IntegrityError: cursor.execute( update_sql, (pattern, datetime.datetime.utcnow(), resource.filename)) finally: db.connection.commit() cursor.close() cursor = db.connection.cursor() cursor.execute( "UPDATE filenames SET infected=? WHERE sha256=? AND infected=0", (int(infected), resource.sha256)) db.connection.commit() cursor.close()
def filename_pattern_match(self, resource, localpart=None): if not resource: return False pattern = patterns.calculate(resource.filename, self.get_filename_localparts(), localpart=localpart) if not pattern: logger.debug("No pattern for filename '%s'.", resource) return logger.debug("Checking database for pattern: %s", pattern) sql = """SELECT DISTINCT f.pattern, (SELECT COUNT(*) FROM filenames f2 WHERE f2.pattern = f.pattern) AS total_cnt, (SELECT COUNT(*) FROM filenames f3 WHERE f3.pattern = f.pattern AND f3.infected=1) AS infected_cnt FROM filenames f WHERE f.pattern = ?""" with AutoDB(self.config.database_path) as db: cursor = db.connection.cursor() cursor.execute(sql, (pattern, )) result = cursor.fetchone() db.connection.commit() cursor.close() if not result: return False pattern, total, infected = result logger.info("Database result for '%s': total: %s, infected: %s", pattern, total, infected) infected_percent = infected / total logger.debug("Requirements: %s total matches, %s total (is: %s, %s infected)", self.config.min_filename_patterns, self.config.min_infected_percent, total, infected_percent) return total >= self.config.min_filename_patterns and infected_percent >= self.config.min_infected_percent
def test_too_few_chunks(self): assert patterns.calculate('abc-def', []) is None
def test_empty_or_null(self): assert patterns.calculate(None, []) is None assert patterns.calculate('', []) is None
def test_all_chunks_equal(self): assert patterns.calculate('foo-bar-baz', ( ('foo-bar-baz', None), ), localpart=None) is None
def test_no_choices(self): assert patterns.calculate('abc-def-ghi', []) is None
def test_all_chunks_equal(self): assert patterns.calculate('foo-bar-baz', (('foo-bar-baz', None), ), localpart=None) is None
def test_no_chunks(self): assert patterns.calculate('abc', []) is None
def test_random_not_long_enough(self): assert patterns.calculate('foobar-1.zip', (('foobar-2.zip', None), )) == 'foobar-1-zip' assert patterns.calculate( 'foobar-11.zip', (('foobar-2.zip', None), )) == 'foobar-11-zip'