def assemble_dif(project_id, name, checksum, chunks, **kwargs): from sentry.models import ChunkFileState, debugfile, Project, \ set_assemble_status, BadDif from sentry.reprocessing import bump_reprocessing_revision with configure_scope() as scope: scope.set_tag("project", project_id) project = Project.objects.filter(id=project_id).get() set_assemble_status(project, checksum, ChunkFileState.ASSEMBLING) # Assemble the chunks into files rv = assemble_file(project, name, checksum, chunks, file_type='project.dif') # If not file has been created this means that the file failed to # assemble because of bad input data. Return. if rv is None: return file, temp_file = rv delete_file = True try: with temp_file: # We only permit split difs to hit this endpoint. The # client is required to split them up first or we error. try: result = debugfile.detect_dif_from_path(temp_file.name) except BadDif as e: set_assemble_status(project, checksum, ChunkFileState.ERROR, detail=e.args[0]) return if len(result) != 1: set_assemble_status(project, checksum, ChunkFileState.ERROR, detail='Contained wrong number of ' 'architectures (expected one, got %s)' % len(result)) return dif_type, cpu, file_id, filename, data = result[0] dif, created = debugfile.create_dif_from_id( project, dif_type, cpu, file_id, data, os.path.basename(name), file=file) indicate_success = True delete_file = False if created: # Bump the reprocessing revision since the symbol has changed # and might resolve processing issues. If the file was not # created, someone else has created it and will bump the # revision instead. bump_reprocessing_revision(project) if indicate_success: set_assemble_status(project, checksum, ChunkFileState.OK) finally: if delete_file: file.delete()
def assemble_file(project, name, checksum, chunks, file_type): '''This assembles multiple chunks into on File.''' from sentry.models import File, ChunkFileState, AssembleChecksumMismatch, \ FileBlob, set_assemble_status # Load all FileBlobs from db since we can be sure here we already own all # chunks need to build the file file_blobs = FileBlob.objects.filter(checksum__in=chunks).values_list( 'id', 'checksum') # We need to make sure the blobs are in the order in which # we received them from the request. # Otherwise it could happen that we assemble the file in the wrong order # and get an garbage file. file_blob_ids = [ x[0] for x in sorted(file_blobs, key=lambda blob: chunks.index(blob[1])) ] # Sanity check. In case not all blobs exist at this point we have a # race condition. if set(x[1] for x in file_blobs) != set(chunks): set_assemble_status(project, checksum, ChunkFileState.ERROR, detail='Not all chunks available for assembling') return file = File.objects.create( name=name, checksum=checksum, type=file_type, ) try: temp_file = file.assemble_from_file_blob_ids(file_blob_ids, checksum) except AssembleChecksumMismatch: file.delete() set_assemble_status(project, checksum, ChunkFileState.ERROR, detail='Reported checksum mismatch') else: file.save() return file, temp_file
def assemble_file(project, name, checksum, chunks, file_type): '''This assembles multiple chunks into on File.''' from sentry.models import File, ChunkFileState, AssembleChecksumMismatch, \ FileBlob, set_assemble_status # Load all FileBlobs from db since we can be sure here we already own all # chunks need to build the file file_blobs = FileBlob.objects.filter( checksum__in=chunks ).values_list('id', 'checksum') # We need to make sure the blobs are in the order in which # we received them from the request. # Otherwise it could happen that we assemble the file in the wrong order # and get an garbage file. file_blob_ids = [x[0] for x in sorted( file_blobs, key=lambda blob: chunks.index(blob[1]) )] # Sanity check. In case not all blobs exist at this point we have a # race condition. if set(x[1] for x in file_blobs) != set(chunks): set_assemble_status(project, checksum, ChunkFileState.ERROR, detail='Not all chunks available for assembling') return file = File.objects.create( name=name, checksum=checksum, type=file_type, ) try: temp_file = file.assemble_from_file_blob_ids(file_blob_ids, checksum) except AssembleChecksumMismatch: file.delete() set_assemble_status(project, checksum, ChunkFileState.ERROR, detail='Reported checksum mismatch') else: file.save() return file, temp_file
def post(self, request, project): """ Assmble one or multiple chunks (FileBlob) into debug files `````````````````````````````````````````````````````````` :auth: required """ schema = { "type": "object", "patternProperties": { "^[0-9a-f]{40}$": { "type": "object", "required": ["name", "chunks"], "properties": { "name": {"type": "string"}, "chunks": { "type": "array", "items": {"type": "string"} } }, "additionalProperties": False } }, "additionalProperties": False } try: files = json.loads(request.body) jsonschema.validate(files, schema) except jsonschema.ValidationError as e: return Response({'error': str(e).splitlines()[0]}, status=400) except BaseException as e: return Response({'error': 'Invalid json body'}, status=400) file_response = {} from sentry.tasks.assemble import assemble_dif for checksum, file_to_assemble in six.iteritems(files): name = file_to_assemble.get('name', None) chunks = file_to_assemble.get('chunks', []) # First, check the cached assemble status. During assembling, a # ProjectDebugFile will be created and we need to prevent a race # condition. state, detail = get_assemble_status(project, checksum) if state is not None: file_response[checksum] = { 'state': state, 'detail': detail, 'missingChunks': [], } continue # Next, check if this project already owns the ProjectDebugFile. # This can under rare circumstances yield more than one file # which is why we use first() here instead of get(). dif = ProjectDebugFile.objects.filter( project=project, file__checksum=checksum ).select_related('file').first() if dif is not None: file_response[checksum] = { 'state': ChunkFileState.OK, 'detail': None, 'missingChunks': [], 'dif': serialize(dif), } continue # There is neither a known file nor a cached state, so we will # have to create a new file. Assure that there are checksums. # If not, we assume this is a poll and report NOT_FOUND if not chunks: file_response[checksum] = { 'state': ChunkFileState.NOT_FOUND, 'missingChunks': [], } continue # Check if all requested chunks have been uploaded. missing_chunks = find_missing_chunks(project.organization, chunks) if missing_chunks: file_response[checksum] = { 'state': ChunkFileState.NOT_FOUND, 'missingChunks': missing_chunks, } continue # We don't have a state yet, this means we can now start # an assemble job in the background. set_assemble_status(project, checksum, state) assemble_dif.apply_async( kwargs={ 'project_id': project.id, 'name': name, 'checksum': checksum, 'chunks': chunks, } ) file_response[checksum] = { 'state': ChunkFileState.CREATED, 'missingChunks': [], } return Response(file_response, status=200)
def assemble_dif(project_id, name, checksum, chunks, **kwargs): from sentry.models import ChunkFileState, dsymfile, Project, \ ProjectDSymFile, set_assemble_status, BadDif from sentry.reprocessing import bump_reprocessing_revision project = Project.objects.filter(id=project_id).get() set_assemble_status(project, checksum, ChunkFileState.ASSEMBLING) # Assemble the chunks into files rv = assemble_file(project, name, checksum, chunks, file_type='project.dsym') # If not file has been created this means that the file failed to # assemble because of bad input data. Return. if rv is None: return file, temp_file = rv delete_file = True try: with temp_file: # We only permit split difs to hit this endpoint. The # client is required to split them up first or we error. try: result = dsymfile.detect_dif_from_path(temp_file.name) except BadDif as e: set_assemble_status(project, checksum, ChunkFileState.ERROR, detail=e.args[0]) return if len(result) != 1: set_assemble_status(project, checksum, ChunkFileState.ERROR, detail='Contained wrong number of ' 'architectures (expected one, got %s)' % len(result)) return dsym_type, cpu, file_id, filename = result[0] dsym, created = dsymfile.create_dsym_from_id( project, dsym_type, cpu, file_id, os.path.basename(name), file=file) delete_file = False bump_reprocessing_revision(project) indicate_success = True # If we need to write a symcache we can use the # `generate_symcache` method to attempt to write one. # This way we can also capture down the error if we need # to. if dsym.supports_symcache: symcache, error = ProjectDSymFile.dsymcache.generate_symcache( project, dsym, temp_file) if error is not None: set_assemble_status(project, checksum, ChunkFileState.ERROR, detail=error) indicate_success = False if indicate_success: set_assemble_status(project, checksum, ChunkFileState.OK) finally: if delete_file: file.delete()
def assemble_dif(project_id, name, checksum, chunks, **kwargs): from sentry.models import ChunkFileState, dsymfile, Project, \ ProjectDSymFile, set_assemble_status, BadDif from sentry.reprocessing import bump_reprocessing_revision with transaction.atomic(): project = Project.objects.filter(id=project_id).get() set_assemble_status(project, checksum, ChunkFileState.ASSEMBLING) # Assemble the chunks into files file = assemble_file(project, name, checksum, chunks, file_type='project.dsym') # If not file has been created this means that the file failed to # assemble because of bad input data. Return. if file is None: return delete_file = True try: with file.getfile(as_tempfile=True) as tf: # We only permit split difs to hit this endpoint. The # client is required to split them up first or we error. try: result = dsymfile.detect_dif_from_path(tf.name) except BadDif as e: set_assemble_status(project, checksum, ChunkFileState.ERROR, detail=e.args[0]) return if len(result) != 1: set_assemble_status( project, checksum, ChunkFileState.ERROR, detail='Contained wrong number of ' 'architectures (expected one, got %s)' % len(result)) return dsym_type, cpu, file_uuid, filename = result[0] dsym, created = dsymfile.create_dsym_from_uuid( project, dsym_type, cpu, file_uuid, os.path.basename(name), file=file) delete_file = False bump_reprocessing_revision(project) # XXX: this should only be done for files that symcache, error = ProjectDSymFile.dsymcache.get_symcache( project, file_uuid, with_conversion_errors=True) if error is not None: set_assemble_status(project, checksum, ChunkFileState.ERROR, detail=error) else: set_assemble_status(project, checksum, ChunkFileState.OK) finally: if delete_file: file.delete()
def assemble_dif(project_id, name, checksum, chunks, **kwargs): from sentry.models import ChunkFileState, debugfile, Project, \ ProjectDebugFile, set_assemble_status, BadDif from sentry.reprocessing import bump_reprocessing_revision project = Project.objects.filter(id=project_id).get() set_assemble_status(project, checksum, ChunkFileState.ASSEMBLING) # Assemble the chunks into files rv = assemble_file(project, name, checksum, chunks, file_type='project.dif') # If not file has been created this means that the file failed to # assemble because of bad input data. Return. if rv is None: return file, temp_file = rv delete_file = True try: with temp_file: # We only permit split difs to hit this endpoint. The # client is required to split them up first or we error. try: result = debugfile.detect_dif_from_path(temp_file.name) except BadDif as e: set_assemble_status(project, checksum, ChunkFileState.ERROR, detail=e.args[0]) return if len(result) != 1: set_assemble_status(project, checksum, ChunkFileState.ERROR, detail='Contained wrong number of ' 'architectures (expected one, got %s)' % len(result)) return dif_type, cpu, file_id, filename, data = result[0] dif, created = debugfile.create_dif_from_id( project, dif_type, cpu, file_id, data, os.path.basename(name), file=file) indicate_success = True delete_file = False if created: # Bump the reprocessing revision since the symbol has changed # and might resolve processing issues. If the file was not # created, someone else has created it and will bump the # revision instead. bump_reprocessing_revision(project) # Try to generate caches from this DIF immediately. If this # fails, we can capture the error and report it to the uploader. # Also, we remove the file to prevent it from erroring again. error = ProjectDebugFile.difcache.generate_caches(project, dif, temp_file.name) if error is not None: set_assemble_status(project, checksum, ChunkFileState.ERROR, detail=error) indicate_success = False dif.delete() if indicate_success: set_assemble_status(project, checksum, ChunkFileState.OK) finally: if delete_file: file.delete()
def post(self, request, project): """ Assmble one or multiple chunks (FileBlob) into dsym files ````````````````````````````````````````````````````````` :auth: required """ schema = { "type": "object", "patternProperties": { "^[0-9a-f]{40}$": { "type": "object", "required": ["name", "chunks"], "properties": { "name": {"type": "string"}, "chunks": { "type": "array", "items": {"type": "string"} } }, "additionalProperties": False } }, "additionalProperties": False } try: files = json.loads(request.body) jsonschema.validate(files, schema) except jsonschema.ValidationError as e: return Response({'error': str(e).splitlines()[0]}, status=400) except BaseException as e: return Response({'error': 'Invalid json body'}, status=400) file_response = {} from sentry.tasks.assemble import assemble_dif for checksum, file_to_assemble in six.iteritems(files): name = file_to_assemble.get('name', None) chunks = file_to_assemble.get('chunks', []) # First, check the cached assemble status. During assembling, a # ProjectDSymFile will be created and we need to prevent a race # condition. state, detail = get_assemble_status(project, checksum) if state is not None: file_response[checksum] = { 'state': state, 'detail': detail, 'missingChunks': [], } continue # Next, check if this project already owns the DSymFile. # This can under rare circumstances yield more than one file # which is why we use first() here instead of get(). dif = ProjectDSymFile.objects.filter( project=project, file__checksum=checksum ).select_related('file').first() if dif is not None: file_response[checksum] = { 'state': ChunkFileState.OK, 'detail': None, 'missingChunks': [], 'dif': serialize(dif), } continue # There is neither a known file nor a cached state, so we will # have to create a new file. Assure that there are checksums. # If not, we assume this is a poll and report NOT_FOUND if not chunks: file_response[checksum] = { 'state': ChunkFileState.NOT_FOUND, 'missingChunks': [], } continue # Check if all requested chunks have been uploaded. missing_chunks = find_missing_chunks(project.organization, chunks) if missing_chunks: file_response[checksum] = { 'state': ChunkFileState.NOT_FOUND, 'missingChunks': missing_chunks, } continue # We don't have a state yet, this means we can now start # an assemble job in the background. set_assemble_status(project, checksum, state) assemble_dif.apply_async( kwargs={ 'project_id': project.id, 'name': name, 'checksum': checksum, 'chunks': chunks, } ) file_response[checksum] = { 'state': ChunkFileState.CREATED, 'missingChunks': [], } return Response(file_response, status=200)
def assemble_dif(project_id, name, checksum, chunks, **kwargs): from sentry.models import ChunkFileState, debugfile, Project, \ ProjectDebugFile, set_assemble_status, BadDif from sentry.reprocessing import bump_reprocessing_revision with configure_scope() as scope: scope.set_tag("project", project_id) project = Project.objects.filter(id=project_id).get() set_assemble_status(project, checksum, ChunkFileState.ASSEMBLING) # Assemble the chunks into files rv = assemble_file(project, name, checksum, chunks, file_type='project.dif') # If not file has been created this means that the file failed to # assemble because of bad input data. Return. if rv is None: return file, temp_file = rv delete_file = True try: with temp_file: # We only permit split difs to hit this endpoint. The # client is required to split them up first or we error. try: result = debugfile.detect_dif_from_path(temp_file.name, name=name) except BadDif as e: set_assemble_status(project, checksum, ChunkFileState.ERROR, detail=e.args[0]) return if len(result) != 1: set_assemble_status(project, checksum, ChunkFileState.ERROR, detail='Contained wrong number of ' 'architectures (expected one, got %s)' % len(result)) return dif, created = debugfile.create_dif_from_id(project, result[0], file=file) indicate_success = True delete_file = False if created: # Bump the reprocessing revision since the symbol has changed # and might resolve processing issues. If the file was not # created, someone else has created it and will bump the # revision instead. bump_reprocessing_revision(project) # Try to generate caches from this DIF immediately. If this # fails, we can capture the error and report it to the uploader. # Also, we remove the file to prevent it from erroring again. error = ProjectDebugFile.difcache.generate_caches(project, dif, temp_file.name) if error is not None: set_assemble_status(project, checksum, ChunkFileState.ERROR, detail=error) indicate_success = False dif.delete() if indicate_success: set_assemble_status(project, checksum, ChunkFileState.OK, detail=serialize(dif)) finally: if delete_file: file.delete()
def assemble_dif(project_id, name, checksum, chunks, **kwargs): from sentry.models import ChunkFileState, debugfile, Project, \ ProjectDebugFile, set_assemble_status, BadDif from sentry.reprocessing import bump_reprocessing_revision project = Project.objects.filter(id=project_id).get() set_assemble_status(project, checksum, ChunkFileState.ASSEMBLING) # Assemble the chunks into files rv = assemble_file(project, name, checksum, chunks, file_type='project.dif') # If not file has been created this means that the file failed to # assemble because of bad input data. Return. if rv is None: return file, temp_file = rv delete_file = True try: with temp_file: # We only permit split difs to hit this endpoint. The # client is required to split them up first or we error. try: result = debugfile.detect_dif_from_path(temp_file.name) except BadDif as e: set_assemble_status(project, checksum, ChunkFileState.ERROR, detail=e.args[0]) return if len(result) != 1: set_assemble_status(project, checksum, ChunkFileState.ERROR, detail='Contained wrong number of ' 'architectures (expected one, got %s)' % len(result)) return dif_type, cpu, file_id, filename = result[0] dif, created = debugfile.create_dif_from_id( project, dif_type, cpu, file_id, os.path.basename(name), file=file) delete_file = False bump_reprocessing_revision(project) indicate_success = True # If we need to write a symcache we can use the # `generate_symcache` method to attempt to write one. # This way we can also capture down the error if we need # to. if dif.supports_symcache: symcache, error = ProjectDebugFile.difcache.generate_symcache( project, dif, temp_file) if error is not None: set_assemble_status(project, checksum, ChunkFileState.ERROR, detail=error) indicate_success = False dif.delete() if indicate_success: set_assemble_status(project, checksum, ChunkFileState.OK) finally: if delete_file: file.delete()