def random_sample_by_year(year): try: year = int(year) except ValueError: raise InvalidUsage('Given year is not an integer') if year < 1970: raise InvalidUsage('Given year should be above 1970') if year > 3000: raise InvalidUsage('Given year should be below 3000') with get_db().cursor() as cursor: cursor.execute(''' SELECT COUNT(*) FROM sample WHERE (\'%i-01-01 00:00:00\' <= build_timestamp) AND (build_timestamp < \'%i-01-01 00:00:00\') ''' % (year, year + 1)) count = cursor.fetchall()[0][0] rand = random.randint(0, count - 1) cursor.execute( ''' SELECT hash_sha256 FROM sample WHERE (\'%i-01-01 00:00:00\' <= build_timestamp) AND (build_timestamp < \'%i-01-01 00:00:00\') LIMIT 1 OFFSET %%s ''' % (year, year + 1), (rand, )) random_sha256 = cursor.fetchall()[0][0] return jsonify(JsonFactory().from_sample( get_sample_repository().by_hash_sha256(random_sha256)))
def bulk(): api_key = request.headers.get('X-ApiKey') if not api_key: raise InvalidUsage('API key required for this action') validate_api_key(api_key) if 'hashes' not in request.files: raise InvalidUsage('Field "hashes" does not exist') samples = get_sample_repository().by_ids( get_sample_repository().ids_by_hashes(request.files['hashes'])) return jsonify([JsonFactory().from_sample(sample) for sample in samples])
def get_sample(hash): if not hash: raise InvalidUsage('Hash empty', status_code=400) if not all(c in string.hexdigits for c in hash): raise InvalidUsage('Hash may only contain hex chars', status_code=400) if len(hash) == 64: sample = get_sample_repository().by_hash_sha256(hash) elif len(hash) == 32: sample = get_sample_repository().by_hash_md5(hash) elif len(hash) == 40: sample = get_sample_repository().by_hash_sha1(hash) else: raise InvalidUsage( 'Hash is not of any of the following lengths: 64, 32, 40', status_code=400) if sample is None: raise NotFound() return jsonify(JsonFactory().from_sample(sample))
def random_samples(count): try: count = int(count) except ValueError: raise InvalidUsage('Given count is not an integer') if count <= 0: raise InvalidUsage('Given count should be above 0') if count > 50: api_key = request.headers.get('X-ApiKey') if not api_key: raise InvalidUsage( 'Given count should be below 50 or you have to pass an API key' ) validate_api_key(api_key) if not get_api_key_repository().exists(api_key): raise InvalidUsage('API key does not exist') return jsonify([ JsonFactory().from_sample(sample) for sample in (get_sample_repository().random_by_id(count)) ])
def newest_samples(): return jsonify([ JsonFactory().from_sample(sample) for sample in (get_sample_repository().newest(10)) ])
def get_samples_by_section(sha256): validate_sha256(sha256) samples = get_sample_repository().by_section_hash(sha256) return jsonify([JsonFactory().from_sample(sample) for sample in samples])
timer.mark('read_extractors') sample = Sample() extractors = get_extractors(args.r2 or (task and task.type == 'R2Disassembly')).values() logger.debug('Enabled Extractors: %s' % extractors) for extractor in extractors: timer.mark('extractor_%s' % extractor.__class__.__name__) try: extractor.extract(sample) except Exception as e: logger.error('%s' % e) if sentry: sentry.captureException() timer.mark('output') out = JsonFactory(args.filter).from_sample(sample) if task: out['task_id'] = task.id if 'source_id' in task.payload.keys(): out['source_id'] = task.payload['source_id'] if 'tags' in task.payload.keys(): out['tags'] = task.payload['tags'] if 'file_names' in task.payload.keys(): out['file_names'] = task.payload['file_names'] if args.server: import requests r = requests.post( kurasuta_api.get_sha256_url(sample.hash_sha256), data=json.dumps(out, cls=DateTimeEncoder),
imported = 0 skipped = 0 logger.debug('Connecting to postgres database...') db = psycopg2.connect(os.environ['POSTGRES_DATABASE_LINK']) logger.debug('Connecting to arango database...') arango_connection = Connection( arangoURL=os.environ['ARANGODB_URL'], username=os.environ['ARANGODB_USERNAME'], password=os.environ['ARANGODB_PASSWORD'] ) arango_database = arango_connection['kurasuta'] sample_repository = SampleRepository(db) json_factory = JsonFactory() with open(hash_file) as fp: hashes = [line.strip() for line in fp if line.strip()] logger.info('Found %i hashes' % len(hashes)) def exists_in_collection(key, collection): # this can go away as soon as https://github.com/tariqdaouda/pyArango/issues/119 is resolved try: doc = collection[key] except KeyError: return False return True
parser.add_argument('target_file_name') args = parser.parse_args() target_file_name = args.target_file_name existing_ids = [] if os.path.exists(target_file_name): with open(target_file_name, 'r') as fp: for line in fp: line = line.strip() if not line: continue existing_ids.append(json.loads(line)['id']) db = psycopg2.connect(os.environ['POSTGRES_DATABASE_LINK']) sample_repository = SampleRepository(db) json_factory = JsonFactory() with db.cursor() as cursor: logger.info('Selecting all ids...') cursor.execute('SELECT id FROM sample') logger.info('Found %i ids.' % cursor.rowcount) for row in cursor: id = row[0] if id in existing_ids: continue samples = sample_repository.by_ids([id]) logger.info('Dumping sample with id %s...' % id) with open(target_file_name, 'a') as fp: for sample in samples: fp.write('%s\n' % json.dumps(json_factory.from_sample(sample))) logger.info('All done.')