Esempio n. 1
0
    def parse_mag(self, data, **kwargs):
        """Return a `Photometry` object from a `PhotometryMag` marshmallow
        schema.

        Parameters
        ----------
        data : dict
            The instance of the PhotometryMag schema to convert to Photometry.

        Returns
        -------
        Photometry
            The Photometry object generated from the PhotometryMag dict.
        """

        from skyportal.models import Instrument, Obj, PHOT_SYS, PHOT_ZP, Photometry
        from sncosmo.photdata import PhotometricData

        # check that mag and magerr are both null or both not null, not a mix
        ok = any([
            all([op(field, None) for field in [data['mag'], data['magerr']]])
            for op in [operator.is_, operator.is_not]
        ])

        if not ok:
            raise ValidationError(f'Error parsing packet "{data}": mag '
                                  f'and magerr must both be null, or both be '
                                  f'not null.')

        # get the instrument
        instrument = Instrument.query.get(data['instrument_id'])
        if not instrument:
            raise ValidationError(
                f'Invalid instrument ID: {data["instrument_id"]}')

        # get the object
        obj = Obj.query.get(
            data['obj_id'])  # TODO: implement permissions checking
        if not obj:
            raise ValidationError(f'Invalid object ID: {data["obj_id"]}')

        if data["filter"] not in instrument.filters:
            raise ValidationError(
                f"Instrument {instrument.name} has no filter "
                f"{data['filter']}.")

        # determine if this is a limit or a measurement
        hasmag = data['mag'] is not None

        if hasmag:
            flux = 10**(-0.4 * (data['mag'] - PHOT_ZP))
            fluxerr = data['magerr'] / (2.5 / np.log(10)) * flux
        else:
            nsigflux = 10**(-0.4 * (data['limiting_mag'] - PHOT_ZP))
            flux = None
            fluxerr = nsigflux / PHOT_DETECTION_THRESHOLD

        # convert flux to microJanskies.
        table = Table([{
            'flux': flux,
            'fluxerr': fluxerr,
            'magsys': data['magsys'],
            'zp': PHOT_ZP,
            'filter': data['filter'],
            'mjd': data['mjd'],
        }])
        if flux is None:
            # this needs to be non-null for the conversion step
            # will be replaced later with null
            table['flux'] = 0.0

        # conversion happens here
        photdata = PhotometricData(table).normalized(zp=PHOT_ZP,
                                                     zpsys=PHOT_SYS)

        # replace with null if needed
        final_flux = None if flux is None else photdata.flux[0]

        p = Photometry(
            obj_id=data['obj_id'],
            mjd=data['mjd'],
            flux=final_flux,
            fluxerr=photdata.fluxerr[0],
            instrument_id=data['instrument_id'],
            assignment_id=data['assignment_id'],
            filter=data['filter'],
            ra=data['ra'],
            dec=data['dec'],
            ra_unc=data['ra_unc'],
            dec_unc=data['dec_unc'],
        )
        if 'alert_id' in data and data['alert_id'] is not None:
            p.alert_id = data['alert_id']
        return p
Esempio n. 2
0
    def standardize_photometry_data(self):

        data = self.get_json()

        if not isinstance(data, dict):
            raise ValidationError(
                'Top level JSON must be an instance of `dict`, got '
                f'{type(data)}.')

        if "altdata" in data and not data["altdata"]:
            del data["altdata"]

        # quick validation - just to make sure things have the right fields
        try:
            data = PhotMagFlexible.load(data)
        except ValidationError as e1:
            try:
                data = PhotFluxFlexible.load(data)
            except ValidationError as e2:
                raise ValidationError(
                    'Invalid input format: Tried to parse data '
                    f'in mag space, got: '
                    f'"{e1.normalized_messages()}." Tried '
                    f'to parse data in flux space, got:'
                    f' "{e2.normalized_messages()}."')
            else:
                kind = 'flux'
        else:
            kind = 'mag'

        # not used here
        _ = data.pop('group_ids', None)

        if allscalar(data):
            data = [data]

        try:
            df = pd.DataFrame(data)
        except ValueError as e:
            if "altdata" in data and "Mixing dicts with non-Series" in str(e):
                try:
                    data["altdata"] = [{
                        key: value[i]
                        for key, value in data["altdata"].items()
                    } for i in range(
                        len(data["altdata"][list(data["altdata"].keys())[-1]]))
                                       ]
                    df = pd.DataFrame(data)
                except ValueError:
                    raise ValidationError(
                        'Unable to coerce passed JSON to a series of packets. '
                        f'Error was: "{e}"')
            else:
                raise ValidationError(
                    'Unable to coerce passed JSON to a series of packets. '
                    f'Error was: "{e}"')

        # `to_numeric` coerces numbers written as strings to numeric types
        #  (int, float)

        #  errors='ignore' means if something is actually an alphanumeric
        #  string, just leave it alone and dont error out

        #  apply is used to apply it to each column
        # (https://stackoverflow.com/questions/34844711/convert-entire-pandas
        # -dataframe-to-integers-in-pandas-0-17-0/34844867
        df = df.apply(pd.to_numeric, errors='ignore')

        # set origin to '' where it is None.
        df.loc[df['origin'].isna(), 'origin'] = ''

        if kind == 'mag':
            # ensure that neither or both mag and magerr are null
            magnull = df['mag'].isna()
            magerrnull = df['magerr'].isna()
            magdet = ~magnull

            # https://en.wikipedia.org/wiki/Bitwise_operation#XOR
            bad = magerrnull ^ magnull  # bitwise exclusive or -- returns true
            #  if A and not B or B and not A

            # coerce to numpy array
            bad = bad.values

            if any(bad):
                # find the first offending packet
                first_offender = np.argwhere(bad)[0, 0]
                packet = df.iloc[first_offender].to_dict()

                # coerce nans to nones
                for key in packet:
                    if key != 'standardized_flux':
                        packet[key] = nan_to_none(packet[key])

                raise ValidationError(
                    f'Error parsing packet "{packet}": mag '
                    f'and magerr must both be null, or both be '
                    f'not null.')

            for field in ['mag', 'magerr', 'limiting_mag']:
                infinite = np.isinf(df[field].values)
                if any(infinite):
                    first_offender = np.argwhere(infinite)[0, 0]
                    packet = df.iloc[first_offender].to_dict()

                    # coerce nans to nones
                    for key in packet:
                        packet[key] = nan_to_none(packet[key])

                    raise ValidationError(f'Error parsing packet "{packet}": '
                                          f'field {field} must be finite.')

            # ensure nothing is null for the required fields
            for field in PhotMagFlexible.required_keys:
                missing = df[field].isna()
                if any(missing):
                    first_offender = np.argwhere(missing)[0, 0]
                    packet = df.iloc[first_offender].to_dict()

                    # coerce nans to nones
                    for key in packet:
                        packet[key] = nan_to_none(packet[key])

                    raise ValidationError(f'Error parsing packet "{packet}": '
                                          f'missing required field {field}.')

            # convert the mags to fluxes
            # detections
            detflux = 10**(-0.4 * (df[magdet]['mag'] - PHOT_ZP))
            detfluxerr = df[magdet]['magerr'] / (2.5 / np.log(10)) * detflux

            # non-detections
            limmag_flux = 10**(-0.4 * (df[magnull]['limiting_mag'] - PHOT_ZP))
            ndetfluxerr = limmag_flux / df[magnull]['limiting_mag_nsigma']

            # initialize flux to be none
            phot_table = Table.from_pandas(df[['mjd', 'magsys', 'filter']])

            phot_table['zp'] = PHOT_ZP
            phot_table['flux'] = np.nan
            phot_table['fluxerr'] = np.nan
            phot_table['flux'][magdet] = detflux
            phot_table['fluxerr'][magdet] = detfluxerr
            phot_table['fluxerr'][magnull] = ndetfluxerr

        else:
            for field in PhotFluxFlexible.required_keys:
                missing = df[field].isna().values
                if any(missing):
                    first_offender = np.argwhere(missing)[0, 0]
                    packet = df.iloc[first_offender].to_dict()

                    for key in packet:
                        packet[key] = nan_to_none(packet[key])

                    raise ValidationError(f'Error parsing packet "{packet}": '
                                          f'missing required field {field}.')

            for field in ['flux', 'fluxerr']:
                infinite = np.isinf(df[field].values)
                if any(infinite):
                    first_offender = np.argwhere(infinite)[0, 0]
                    packet = df.iloc[first_offender].to_dict()

                    # coerce nans to nones
                    for key in packet:
                        packet[key] = nan_to_none(packet[key])

                    raise ValidationError(f'Error parsing packet "{packet}": '
                                          f'field {field} must be finite.')

            phot_table = Table.from_pandas(
                df[['mjd', 'magsys', 'filter', 'zp']])
            phot_table['flux'] = df['flux'].fillna(np.nan)
            phot_table['fluxerr'] = df['fluxerr'].fillna(np.nan)

        # convert to microjanskies, AB for DB storage as a vectorized operation
        pdata = PhotometricData(phot_table)
        standardized = pdata.normalized(zp=PHOT_ZP, zpsys='ab')

        df['standardized_flux'] = standardized.flux
        df['standardized_fluxerr'] = standardized.fluxerr

        instrument_cache = {}
        for iid in df['instrument_id'].unique():
            instrument = Instrument.query.get(int(iid))
            if not instrument:
                raise ValidationError(f'Invalid instrument ID: {iid}')
            instrument_cache[iid] = instrument

        for oid in df['obj_id'].unique():
            obj = Obj.query.get(oid)
            if not obj:
                raise ValidationError(f'Invalid object ID: {oid}')

        return df, instrument_cache
Esempio n. 3
0
    def parse_flux(self, data, **kwargs):
        """Return a `Photometry` object from a `PhotometryFlux` marshmallow
        schema.

        Parameters
        ----------
        data : dict
            The instance of the PhotometryFlux schema to convert to Photometry.

        Returns
        -------
        Photometry
            The Photometry object generated from the PhotometryFlux object.
        """

        from skyportal.models import Instrument, Obj, PHOT_SYS, PHOT_ZP, Photometry
        from sncosmo.photdata import PhotometricData

        # get the instrument
        instrument = Instrument.query.get(data['instrument_id'])
        if not instrument:
            raise ValidationError(
                f'Invalid instrument ID: {data["instrument_id"]}')

        # get the object
        obj = Obj.query.get(
            data['obj_id'])  # TODO : implement permissions checking
        if not obj:
            raise ValidationError(f'Invalid object ID: {data["obj_id"]}')

        if data["filter"] not in instrument.filters:
            raise ValidationError(
                f"Instrument {instrument.name} has no filter "
                f"{data['filter']}.")

        # convert flux to microJanskies.
        table = Table([data])
        if data['flux'] is None:
            # this needs to be non-null for the conversion step
            # will be replaced later with null
            table['flux'] = 0.0

        # conversion happens here
        photdata = PhotometricData(table).normalized(zp=PHOT_ZP,
                                                     zpsys=PHOT_SYS)

        # replace with null if needed
        final_flux = None if data['flux'] is None else photdata.flux[0]

        p = Photometry(
            obj_id=data['obj_id'],
            mjd=data['mjd'],
            flux=final_flux,
            fluxerr=photdata.fluxerr[0],
            instrument_id=data['instrument_id'],
            assignment_id=data['assignment_id'],
            filter=data['filter'],
            ra=data['ra'],
            dec=data['dec'],
            ra_unc=data['ra_unc'],
            dec_unc=data['dec_unc'],
        )
        if 'alert_id' in data and data['alert_id'] is not None:
            p.alert_id = data['alert_id']
        return p
Esempio n. 4
0
    def post(self):
        """
        ---
        description: Upload photometry
        requestBody:
          content:
            application/json:
              schema:
                oneOf:
                  - $ref: "#/components/schemas/PhotMagFlexible"
                  - $ref: "#/components/schemas/PhotFluxFlexible"
        responses:
          200:
            content:
              application/json:
                schema:
                  allOf:
                    - $ref: '#/components/schemas/Success'
                    - type: object
                      properties:
                        data:
                          type: object
                          properties:
                            ids:
                              type: array
                              items:
                                type: integer
                              description: List of new photometry IDs
                            upload_id:
                              type: string
                              description: |
                                Upload ID associated with all photometry points
                                added in request. Can be used to later delete all
                                points in a single request.
        """

        data = self.get_json()

        if not isinstance(data, dict):
            return self.error(
                'Top level JSON must be an instance of `dict`, got '
                f'{type(data)}.')

        if "altdata" in data and not data["altdata"]:
            del data["altdata"]

        # quick validation - just to make sure things have the right fields
        try:
            data = PhotMagFlexible.load(data)
        except ValidationError as e1:
            try:
                data = PhotFluxFlexible.load(data)
            except ValidationError as e2:
                return self.error('Invalid input format: Tried to parse data '
                                  f'in mag space, got: '
                                  f'"{e1.normalized_messages()}." Tried '
                                  f'to parse data in flux space, got:'
                                  f' "{e2.normalized_messages()}."')
            else:
                kind = 'flux'
        else:
            kind = 'mag'

        try:
            group_ids = data.pop("group_ids")
        except KeyError:
            return self.error("Missing required field: group_ids")
        groups = Group.query.filter(Group.id.in_(group_ids)).all()
        if not groups:
            return self.error("Invalid group_ids field. "
                              "Specify at least one valid group ID.")
        if "Super admin" not in [
                r.id for r in self.associated_user_object.roles
        ]:
            if not all([group in self.current_user.groups
                        for group in groups]):
                return self.error(
                    "Cannot upload photometry to groups that you "
                    "are not a member of.")
        if "alert_id" in data:
            phot = Photometry.query.filter(
                Photometry.alert_id == data["alert_id"]).filter(
                    Photometry.alert_id.isnot(None)).first()
            if phot is not None:
                phot.groups = groups
                DBSession().commit()
                return self.success(data={
                    "ids": [phot.id],
                    "upload_id": phot.upload_id
                })

        if allscalar(data):
            data = [data]

        upload_id = str(uuid.uuid4())

        try:
            df = pd.DataFrame(data)
        except ValueError as e:
            if "altdata" in data and "Mixing dicts with non-Series" in str(e):
                try:
                    data["altdata"] = [{
                        key: value[i]
                        for key, value in data["altdata"].items()
                    } for i in range(
                        len(data["altdata"][list(data["altdata"].keys())[-1]]))
                                       ]
                    df = pd.DataFrame(data)
                except ValueError:
                    return self.error(
                        'Unable to coerce passed JSON to a series of packets. '
                        f'Error was: "{e}"')
            else:
                return self.error(
                    'Unable to coerce passed JSON to a series of packets. '
                    f'Error was: "{e}"')

        # `to_numeric` coerces numbers written as strings to numeric types
        #  (int, float)

        #  errors='ignore' means if something is actually an alphanumeric
        #  string, just leave it alone and dont error out

        #  apply is used to apply it to each column
        # (https://stackoverflow.com/questions/34844711/convert-entire-pandas
        # -dataframe-to-integers-in-pandas-0-17-0/34844867
        df = df.apply(pd.to_numeric, errors='ignore')

        if kind == 'mag':
            # ensure that neither or both mag and magerr are null
            magnull = df['mag'].isna()
            magerrnull = df['magerr'].isna()
            magdet = ~magnull

            # https://en.wikipedia.org/wiki/Bitwise_operation#XOR
            bad = magerrnull ^ magnull  # bitwise exclusive or -- returns true
            #  if A and not B or B and not A

            if any(bad):
                # find the first offending packet
                first_offender = np.argwhere(bad)[0, 0]
                packet = df.iloc[first_offender].to_dict()

                # coerce nans to nones
                for key in packet:
                    packet[key] = nan_to_none(packet[key])

                return self.error(f'Error parsing packet "{packet}": mag '
                                  f'and magerr must both be null, or both be '
                                  f'not null.')

            # ensure nothing is null for the required fields
            for field in PhotMagFlexible.required_keys:
                missing = df[field].isna()
                if any(missing):
                    first_offender = np.argwhere(missing)[0, 0]
                    packet = df.iloc[first_offender].to_dict()

                    # coerce nans to nones
                    for key in packet:
                        packet[key] = nan_to_none(packet[key])

                    return self.error(f'Error parsing packet "{packet}": '
                                      f'missing required field {field}.')

            # convert the mags to fluxes
            # detections
            detflux = 10**(-0.4 * (df[magdet]['mag'] - PHOT_ZP))
            detfluxerr = df[magdet]['magerr'] / (2.5 / np.log(10)) * detflux

            # non-detections
            limmag_flux = 10**(-0.4 * (df[magnull]['limiting_mag'] - PHOT_ZP))
            ndetfluxerr = limmag_flux / df[magnull]['limiting_mag_nsigma']

            # initialize flux to be none
            phot_table = Table.from_pandas(df[['mjd', 'magsys', 'filter']])

            phot_table['zp'] = PHOT_ZP
            phot_table['flux'] = np.nan
            phot_table['fluxerr'] = np.nan
            phot_table['flux'][magdet] = detflux
            phot_table['fluxerr'][magdet] = detfluxerr
            phot_table['fluxerr'][magnull] = ndetfluxerr

        else:
            for field in PhotFluxFlexible.required_keys:
                missing = df[field].isna()
                if any(missing):
                    first_offender = np.argwhere(missing)[0, 0]
                    packet = df.iloc[first_offender].to_dict()

                    for key in packet:
                        packet[key] = nan_to_none(packet[key])

                    return self.error(f'Error parsing packet "{packet}": '
                                      f'missing required field {field}.')

            phot_table = Table.from_pandas(
                df[['mjd', 'magsys', 'filter', 'zp']])
            phot_table['flux'] = df['flux'].fillna(np.nan)
            phot_table['fluxerr'] = df['fluxerr'].fillna(np.nan)

        # convert to microjanskies, AB for DB storage as a vectorized operation
        pdata = PhotometricData(phot_table)
        standardized = pdata.normalized(zp=PHOT_ZP, zpsys='ab')

        df['standardized_flux'] = standardized.flux
        df['standardized_fluxerr'] = standardized.fluxerr

        instcache = {}
        for iid in df['instrument_id'].unique():
            instrument = Instrument.query.get(int(iid))
            if not instrument:
                return self.error(f'Invalid instrument ID: {iid}')
            instcache[iid] = instrument

        for oid in df['obj_id'].unique():
            obj = Obj.query.get(oid)
            if not obj:
                return self.error(f'Invalid object ID: {oid}')

        # pre-fetch the photometry PKs. these are not guaranteed to be
        # gapless (e.g., 1, 2, 3, 4, 5, ...) but they are guaranteed
        # to be unique in the table and thus can be used to "reserve"
        # PK slots for uninserted rows
        pkq = f"SELECT nextval('photometry_id_seq') FROM " \
              f"generate_series(1, {len(df)})"

        proxy = DBSession().execute(pkq)

        # cache this as list for response
        ids = [i[0] for i in proxy]
        df['id'] = ids
        rows = df.where(pd.notnull(df), None).to_dict('records')

        params = []
        for packet in rows:
            if packet["filter"] not in instcache[
                    packet['instrument_id']].filters:
                raise ValidationError(
                    f"Instrument {instrument.name} has no filter "
                    f"{packet['filter']}.")

            flux = packet.pop('standardized_flux')
            fluxerr = packet.pop('standardized_fluxerr')

            # reduce the DB size by ~2x
            keys = ['limiting_mag', 'magsys', 'limiting_mag_nsigma']
            original_user_data = {
                key: packet[key]
                for key in keys if key in packet
            }
            if original_user_data == {}:
                original_user_data = None

            phot = dict(id=packet['id'],
                        original_user_data=original_user_data,
                        upload_id=upload_id,
                        flux=flux,
                        fluxerr=fluxerr,
                        obj_id=packet['obj_id'],
                        altdata=packet['altdata'],
                        instrument_id=packet['instrument_id'],
                        ra_unc=packet['ra_unc'],
                        dec_unc=packet['dec_unc'],
                        mjd=packet['mjd'],
                        filter=packet['filter'],
                        ra=packet['ra'],
                        dec=packet['dec'])

            params.append(phot)

        #  actually do the insert
        query = Photometry.__table__.insert()
        DBSession().execute(query, params)

        groupquery = GroupPhotometry.__table__.insert()
        params = []
        for id in ids:
            for group_id in group_ids:
                params.append({'photometr_id': id, 'group_id': group_id})

        DBSession().execute(groupquery, params)
        DBSession().commit()

        return self.success(data={"ids": ids, "upload_id": upload_id})