Esempio n. 1
0
def assemble_dif(project_id, name, checksum, chunks, **kwargs):
    from sentry.models import ChunkFileState, debugfile, Project, \
        set_assemble_status, BadDif
    from sentry.reprocessing import bump_reprocessing_revision

    with configure_scope() as scope:
        scope.set_tag("project", project_id)

    project = Project.objects.filter(id=project_id).get()
    set_assemble_status(project, checksum, ChunkFileState.ASSEMBLING)

    # Assemble the chunks into files
    rv = assemble_file(project, name, checksum, chunks,
                       file_type='project.dif')

    # If not file has been created this means that the file failed to
    # assemble because of bad input data.  Return.
    if rv is None:
        return

    file, temp_file = rv
    delete_file = True
    try:
        with temp_file:
            # We only permit split difs to hit this endpoint.  The
            # client is required to split them up first or we error.
            try:
                result = debugfile.detect_dif_from_path(temp_file.name)
            except BadDif as e:
                set_assemble_status(project, checksum, ChunkFileState.ERROR,
                                    detail=e.args[0])
                return

            if len(result) != 1:
                set_assemble_status(project, checksum, ChunkFileState.ERROR,
                                    detail='Contained wrong number of '
                                    'architectures (expected one, got %s)'
                                    % len(result))
                return

            dif_type, cpu, file_id, filename, data = result[0]
            dif, created = debugfile.create_dif_from_id(
                project, dif_type, cpu, file_id, data,
                os.path.basename(name),
                file=file)
            indicate_success = True
            delete_file = False

            if created:
                # Bump the reprocessing revision since the symbol has changed
                # and might resolve processing issues. If the file was not
                # created, someone else has created it and will bump the
                # revision instead.
                bump_reprocessing_revision(project)

            if indicate_success:
                set_assemble_status(project, checksum, ChunkFileState.OK)
    finally:
        if delete_file:
            file.delete()
Esempio n. 2
0
def assemble_file(project, name, checksum, chunks, file_type):
    '''This assembles multiple chunks into on File.'''
    from sentry.models import File, ChunkFileState, AssembleChecksumMismatch, \
        FileBlob, set_assemble_status

    # Load all FileBlobs from db since we can be sure here we already own all
    # chunks need to build the file
    file_blobs = FileBlob.objects.filter(checksum__in=chunks).values_list(
        'id', 'checksum')

    # We need to make sure the blobs are in the order in which
    # we received them from the request.
    # Otherwise it could happen that we assemble the file in the wrong order
    # and get an garbage file.
    file_blob_ids = [
        x[0]
        for x in sorted(file_blobs, key=lambda blob: chunks.index(blob[1]))
    ]

    # Sanity check.  In case not all blobs exist at this point we have a
    # race condition.
    if set(x[1] for x in file_blobs) != set(chunks):
        set_assemble_status(project,
                            checksum,
                            ChunkFileState.ERROR,
                            detail='Not all chunks available for assembling')
        return

    file = File.objects.create(
        name=name,
        checksum=checksum,
        type=file_type,
    )
    try:
        temp_file = file.assemble_from_file_blob_ids(file_blob_ids, checksum)
    except AssembleChecksumMismatch:
        file.delete()
        set_assemble_status(project,
                            checksum,
                            ChunkFileState.ERROR,
                            detail='Reported checksum mismatch')
    else:
        file.save()
        return file, temp_file
Esempio n. 3
0
def assemble_file(project, name, checksum, chunks, file_type):
    '''This assembles multiple chunks into on File.'''
    from sentry.models import File, ChunkFileState, AssembleChecksumMismatch, \
        FileBlob, set_assemble_status

    # Load all FileBlobs from db since we can be sure here we already own all
    # chunks need to build the file
    file_blobs = FileBlob.objects.filter(
        checksum__in=chunks
    ).values_list('id', 'checksum')

    # We need to make sure the blobs are in the order in which
    # we received them from the request.
    # Otherwise it could happen that we assemble the file in the wrong order
    # and get an garbage file.
    file_blob_ids = [x[0] for x in sorted(
        file_blobs, key=lambda blob: chunks.index(blob[1])
    )]

    # Sanity check.  In case not all blobs exist at this point we have a
    # race condition.
    if set(x[1] for x in file_blobs) != set(chunks):
        set_assemble_status(project, checksum, ChunkFileState.ERROR,
                            detail='Not all chunks available for assembling')
        return

    file = File.objects.create(
        name=name,
        checksum=checksum,
        type=file_type,
    )
    try:
        temp_file = file.assemble_from_file_blob_ids(file_blob_ids, checksum)
    except AssembleChecksumMismatch:
        file.delete()
        set_assemble_status(project, checksum, ChunkFileState.ERROR,
                            detail='Reported checksum mismatch')
    else:
        file.save()
        return file, temp_file
Esempio n. 4
0
    def post(self, request, project):
        """
        Assmble one or multiple chunks (FileBlob) into debug files
        ``````````````````````````````````````````````````````````

        :auth: required
        """
        schema = {
            "type": "object",
            "patternProperties": {
                "^[0-9a-f]{40}$": {
                    "type": "object",
                    "required": ["name", "chunks"],
                    "properties": {
                        "name": {"type": "string"},
                        "chunks": {
                            "type": "array",
                            "items": {"type": "string"}
                        }
                    },
                    "additionalProperties": False
                }
            },
            "additionalProperties": False
        }

        try:
            files = json.loads(request.body)
            jsonschema.validate(files, schema)
        except jsonschema.ValidationError as e:
            return Response({'error': str(e).splitlines()[0]},
                            status=400)
        except BaseException as e:
            return Response({'error': 'Invalid json body'},
                            status=400)

        file_response = {}

        from sentry.tasks.assemble import assemble_dif
        for checksum, file_to_assemble in six.iteritems(files):
            name = file_to_assemble.get('name', None)
            chunks = file_to_assemble.get('chunks', [])

            # First, check the cached assemble status. During assembling, a
            # ProjectDebugFile will be created and we need to prevent a race
            # condition.
            state, detail = get_assemble_status(project, checksum)
            if state is not None:
                file_response[checksum] = {
                    'state': state,
                    'detail': detail,
                    'missingChunks': [],
                }
                continue

            # Next, check if this project already owns the ProjectDebugFile.
            # This can under rare circumstances yield more than one file
            # which is why we use first() here instead of get().
            dif = ProjectDebugFile.objects.filter(
                project=project,
                file__checksum=checksum
            ).select_related('file').first()
            if dif is not None:
                file_response[checksum] = {
                    'state': ChunkFileState.OK,
                    'detail': None,
                    'missingChunks': [],
                    'dif': serialize(dif),
                }
                continue

            # There is neither a known file nor a cached state, so we will
            # have to create a new file.  Assure that there are checksums.
            # If not, we assume this is a poll and report NOT_FOUND
            if not chunks:
                file_response[checksum] = {
                    'state': ChunkFileState.NOT_FOUND,
                    'missingChunks': [],
                }
                continue

            # Check if all requested chunks have been uploaded.
            missing_chunks = find_missing_chunks(project.organization, chunks)
            if missing_chunks:
                file_response[checksum] = {
                    'state': ChunkFileState.NOT_FOUND,
                    'missingChunks': missing_chunks,
                }
                continue

            # We don't have a state yet, this means we can now start
            # an assemble job in the background.
            set_assemble_status(project, checksum, state)
            assemble_dif.apply_async(
                kwargs={
                    'project_id': project.id,
                    'name': name,
                    'checksum': checksum,
                    'chunks': chunks,
                }
            )

            file_response[checksum] = {
                'state': ChunkFileState.CREATED,
                'missingChunks': [],
            }

        return Response(file_response, status=200)
def assemble_dif(project_id, name, checksum, chunks, **kwargs):
    from sentry.models import ChunkFileState, dsymfile, Project, \
        ProjectDSymFile, set_assemble_status, BadDif
    from sentry.reprocessing import bump_reprocessing_revision

    project = Project.objects.filter(id=project_id).get()
    set_assemble_status(project, checksum, ChunkFileState.ASSEMBLING)

    # Assemble the chunks into files
    rv = assemble_file(project, name, checksum, chunks,
                       file_type='project.dsym')

    # If not file has been created this means that the file failed to
    # assemble because of bad input data.  Return.
    if rv is None:
        return

    file, temp_file = rv
    delete_file = True
    try:
        with temp_file:
            # We only permit split difs to hit this endpoint.  The
            # client is required to split them up first or we error.
            try:
                result = dsymfile.detect_dif_from_path(temp_file.name)
            except BadDif as e:
                set_assemble_status(project, checksum, ChunkFileState.ERROR,
                                    detail=e.args[0])
                return

            if len(result) != 1:
                set_assemble_status(project, checksum, ChunkFileState.ERROR,
                                    detail='Contained wrong number of '
                                    'architectures (expected one, got %s)'
                                    % len(result))
                return

            dsym_type, cpu, file_id, filename = result[0]
            dsym, created = dsymfile.create_dsym_from_id(
                project, dsym_type, cpu, file_id,
                os.path.basename(name),
                file=file)
            delete_file = False
            bump_reprocessing_revision(project)

            indicate_success = True

            # If we need to write a symcache we can use the
            # `generate_symcache` method to attempt to write one.
            # This way we can also capture down the error if we need
            # to.
            if dsym.supports_symcache:
                symcache, error = ProjectDSymFile.dsymcache.generate_symcache(
                    project, dsym, temp_file)
                if error is not None:
                    set_assemble_status(project, checksum, ChunkFileState.ERROR,
                                        detail=error)
                    indicate_success = False

            if indicate_success:
                set_assemble_status(project, checksum, ChunkFileState.OK)
    finally:
        if delete_file:
            file.delete()
Esempio n. 6
0
def assemble_dif(project_id, name, checksum, chunks, **kwargs):
    from sentry.models import ChunkFileState, dsymfile, Project, \
        ProjectDSymFile, set_assemble_status, BadDif
    from sentry.reprocessing import bump_reprocessing_revision

    with transaction.atomic():
        project = Project.objects.filter(id=project_id).get()
        set_assemble_status(project, checksum, ChunkFileState.ASSEMBLING)

        # Assemble the chunks into files
        file = assemble_file(project,
                             name,
                             checksum,
                             chunks,
                             file_type='project.dsym')

        # If not file has been created this means that the file failed to
        # assemble because of bad input data.  Return.
        if file is None:
            return

        delete_file = True
        try:
            with file.getfile(as_tempfile=True) as tf:
                # We only permit split difs to hit this endpoint.  The
                # client is required to split them up first or we error.
                try:
                    result = dsymfile.detect_dif_from_path(tf.name)
                except BadDif as e:
                    set_assemble_status(project,
                                        checksum,
                                        ChunkFileState.ERROR,
                                        detail=e.args[0])
                    return

                if len(result) != 1:
                    set_assemble_status(
                        project,
                        checksum,
                        ChunkFileState.ERROR,
                        detail='Contained wrong number of '
                        'architectures (expected one, got %s)' % len(result))
                    return

                dsym_type, cpu, file_uuid, filename = result[0]
                dsym, created = dsymfile.create_dsym_from_uuid(
                    project,
                    dsym_type,
                    cpu,
                    file_uuid,
                    os.path.basename(name),
                    file=file)
                delete_file = False
                bump_reprocessing_revision(project)

                # XXX: this should only be done for files that
                symcache, error = ProjectDSymFile.dsymcache.get_symcache(
                    project, file_uuid, with_conversion_errors=True)
                if error is not None:
                    set_assemble_status(project,
                                        checksum,
                                        ChunkFileState.ERROR,
                                        detail=error)
                else:
                    set_assemble_status(project, checksum, ChunkFileState.OK)
        finally:
            if delete_file:
                file.delete()
Esempio n. 7
0
def assemble_dif(project_id, name, checksum, chunks, **kwargs):
    from sentry.models import ChunkFileState, debugfile, Project, \
        ProjectDebugFile, set_assemble_status, BadDif
    from sentry.reprocessing import bump_reprocessing_revision

    project = Project.objects.filter(id=project_id).get()
    set_assemble_status(project, checksum, ChunkFileState.ASSEMBLING)

    # Assemble the chunks into files
    rv = assemble_file(project, name, checksum, chunks,
                       file_type='project.dif')

    # If not file has been created this means that the file failed to
    # assemble because of bad input data.  Return.
    if rv is None:
        return

    file, temp_file = rv
    delete_file = True
    try:
        with temp_file:
            # We only permit split difs to hit this endpoint.  The
            # client is required to split them up first or we error.
            try:
                result = debugfile.detect_dif_from_path(temp_file.name)
            except BadDif as e:
                set_assemble_status(project, checksum, ChunkFileState.ERROR,
                                    detail=e.args[0])
                return

            if len(result) != 1:
                set_assemble_status(project, checksum, ChunkFileState.ERROR,
                                    detail='Contained wrong number of '
                                    'architectures (expected one, got %s)'
                                    % len(result))
                return

            dif_type, cpu, file_id, filename, data = result[0]
            dif, created = debugfile.create_dif_from_id(
                project, dif_type, cpu, file_id, data,
                os.path.basename(name),
                file=file)
            indicate_success = True
            delete_file = False

            if created:
                # Bump the reprocessing revision since the symbol has changed
                # and might resolve processing issues. If the file was not
                # created, someone else has created it and will bump the
                # revision instead.
                bump_reprocessing_revision(project)

                # Try to generate caches from this DIF immediately. If this
                # fails, we can capture the error and report it to the uploader.
                # Also, we remove the file to prevent it from erroring again.
                error = ProjectDebugFile.difcache.generate_caches(project, dif, temp_file.name)
                if error is not None:
                    set_assemble_status(project, checksum, ChunkFileState.ERROR,
                                        detail=error)
                    indicate_success = False
                    dif.delete()

            if indicate_success:
                set_assemble_status(project, checksum, ChunkFileState.OK)
    finally:
        if delete_file:
            file.delete()
Esempio n. 8
0
    def post(self, request, project):
        """
        Assmble one or multiple chunks (FileBlob) into dsym files
        `````````````````````````````````````````````````````````

        :auth: required
        """
        schema = {
            "type": "object",
            "patternProperties": {
                "^[0-9a-f]{40}$": {
                    "type": "object",
                    "required": ["name", "chunks"],
                    "properties": {
                        "name": {"type": "string"},
                        "chunks": {
                            "type": "array",
                            "items": {"type": "string"}
                        }
                    },
                    "additionalProperties": False
                }
            },
            "additionalProperties": False
        }

        try:
            files = json.loads(request.body)
            jsonschema.validate(files, schema)
        except jsonschema.ValidationError as e:
            return Response({'error': str(e).splitlines()[0]},
                            status=400)
        except BaseException as e:
            return Response({'error': 'Invalid json body'},
                            status=400)

        file_response = {}

        from sentry.tasks.assemble import assemble_dif
        for checksum, file_to_assemble in six.iteritems(files):
            name = file_to_assemble.get('name', None)
            chunks = file_to_assemble.get('chunks', [])

            # First, check the cached assemble status. During assembling, a
            # ProjectDSymFile will be created and we need to prevent a race
            # condition.
            state, detail = get_assemble_status(project, checksum)
            if state is not None:
                file_response[checksum] = {
                    'state': state,
                    'detail': detail,
                    'missingChunks': [],
                }
                continue

            # Next, check if this project already owns the DSymFile.
            # This can under rare circumstances yield more than one file
            # which is why we use first() here instead of get().
            dif = ProjectDSymFile.objects.filter(
                project=project,
                file__checksum=checksum
            ).select_related('file').first()
            if dif is not None:
                file_response[checksum] = {
                    'state': ChunkFileState.OK,
                    'detail': None,
                    'missingChunks': [],
                    'dif': serialize(dif),
                }
                continue

            # There is neither a known file nor a cached state, so we will
            # have to create a new file.  Assure that there are checksums.
            # If not, we assume this is a poll and report NOT_FOUND
            if not chunks:
                file_response[checksum] = {
                    'state': ChunkFileState.NOT_FOUND,
                    'missingChunks': [],
                }
                continue

            # Check if all requested chunks have been uploaded.
            missing_chunks = find_missing_chunks(project.organization, chunks)
            if missing_chunks:
                file_response[checksum] = {
                    'state': ChunkFileState.NOT_FOUND,
                    'missingChunks': missing_chunks,
                }
                continue

            # We don't have a state yet, this means we can now start
            # an assemble job in the background.
            set_assemble_status(project, checksum, state)
            assemble_dif.apply_async(
                kwargs={
                    'project_id': project.id,
                    'name': name,
                    'checksum': checksum,
                    'chunks': chunks,
                }
            )

            file_response[checksum] = {
                'state': ChunkFileState.CREATED,
                'missingChunks': [],
            }

        return Response(file_response, status=200)
Esempio n. 9
0
def assemble_dif(project_id, name, checksum, chunks, **kwargs):
    from sentry.models import ChunkFileState, debugfile, Project, \
        ProjectDebugFile, set_assemble_status, BadDif
    from sentry.reprocessing import bump_reprocessing_revision

    with configure_scope() as scope:
        scope.set_tag("project", project_id)

    project = Project.objects.filter(id=project_id).get()
    set_assemble_status(project, checksum, ChunkFileState.ASSEMBLING)

    # Assemble the chunks into files
    rv = assemble_file(project, name, checksum, chunks,
                       file_type='project.dif')

    # If not file has been created this means that the file failed to
    # assemble because of bad input data.  Return.
    if rv is None:
        return

    file, temp_file = rv
    delete_file = True
    try:
        with temp_file:
            # We only permit split difs to hit this endpoint.  The
            # client is required to split them up first or we error.
            try:
                result = debugfile.detect_dif_from_path(temp_file.name, name=name)
            except BadDif as e:
                set_assemble_status(project, checksum, ChunkFileState.ERROR,
                                    detail=e.args[0])
                return

            if len(result) != 1:
                set_assemble_status(project, checksum, ChunkFileState.ERROR,
                                    detail='Contained wrong number of '
                                    'architectures (expected one, got %s)'
                                    % len(result))
                return

            dif, created = debugfile.create_dif_from_id(project, result[0], file=file)
            indicate_success = True
            delete_file = False

            if created:
                # Bump the reprocessing revision since the symbol has changed
                # and might resolve processing issues. If the file was not
                # created, someone else has created it and will bump the
                # revision instead.
                bump_reprocessing_revision(project)

                # Try to generate caches from this DIF immediately. If this
                # fails, we can capture the error and report it to the uploader.
                # Also, we remove the file to prevent it from erroring again.
                error = ProjectDebugFile.difcache.generate_caches(project, dif, temp_file.name)
                if error is not None:
                    set_assemble_status(project, checksum, ChunkFileState.ERROR,
                                        detail=error)
                    indicate_success = False
                    dif.delete()

            if indicate_success:
                set_assemble_status(project, checksum, ChunkFileState.OK,
                                    detail=serialize(dif))
    finally:
        if delete_file:
            file.delete()
Esempio n. 10
0
def assemble_dif(project_id, name, checksum, chunks, **kwargs):
    from sentry.models import ChunkFileState, debugfile, Project, \
        ProjectDebugFile, set_assemble_status, BadDif
    from sentry.reprocessing import bump_reprocessing_revision

    project = Project.objects.filter(id=project_id).get()
    set_assemble_status(project, checksum, ChunkFileState.ASSEMBLING)

    # Assemble the chunks into files
    rv = assemble_file(project, name, checksum, chunks,
                       file_type='project.dif')

    # If not file has been created this means that the file failed to
    # assemble because of bad input data.  Return.
    if rv is None:
        return

    file, temp_file = rv
    delete_file = True
    try:
        with temp_file:
            # We only permit split difs to hit this endpoint.  The
            # client is required to split them up first or we error.
            try:
                result = debugfile.detect_dif_from_path(temp_file.name)
            except BadDif as e:
                set_assemble_status(project, checksum, ChunkFileState.ERROR,
                                    detail=e.args[0])
                return

            if len(result) != 1:
                set_assemble_status(project, checksum, ChunkFileState.ERROR,
                                    detail='Contained wrong number of '
                                    'architectures (expected one, got %s)'
                                    % len(result))
                return

            dif_type, cpu, file_id, filename = result[0]
            dif, created = debugfile.create_dif_from_id(
                project, dif_type, cpu, file_id,
                os.path.basename(name),
                file=file)
            delete_file = False
            bump_reprocessing_revision(project)

            indicate_success = True

            # If we need to write a symcache we can use the
            # `generate_symcache` method to attempt to write one.
            # This way we can also capture down the error if we need
            # to.
            if dif.supports_symcache:
                symcache, error = ProjectDebugFile.difcache.generate_symcache(
                    project, dif, temp_file)
                if error is not None:
                    set_assemble_status(project, checksum, ChunkFileState.ERROR,
                                        detail=error)
                    indicate_success = False
                    dif.delete()

            if indicate_success:
                set_assemble_status(project, checksum, ChunkFileState.OK)
    finally:
        if delete_file:
            file.delete()