Python pivot_valuesの例、openfda.parallel.pivot_values Pythonの例

コード例 #1

0

ファイルを表示

    def _join(self, key, values):
        val = parallel.pivot_values(values)

        final = {'device': [], 'mdr_text': [], 'patient': []}

        if not val.get('mdrfoi', []):
            logging.info('MDR REPORT %s: Missing mdrfoi record, Skipping join',
                         key)
            return

        for i, main_report in enumerate(val.get('mdrfoi', [])):
            final.update(main_report)

        try:
            int(final.get('mdr_report_key', None))
        except TypeError:
            logging.info('%s', '*' * 2400)
            return

        for source_file, target_key in self.join_map.items():
            for row in val.get(source_file, []):
                row.pop('mdr_report_key',
                        0)  # No need to keep join key on nested data
                final[target_key].append(row)

        return final

コード例 #2

0

ファイルを表示

ファイル: pipeline.py プロジェクト: FDA/openfda

  def reduce(self, key, values, output):
    def _safe_get(value):
      if isinstance(value, list):
        if len(value) > 0:
          value = value[0]
        else:
          return None

      return value

    # If there is only one value, then we use it. If there are many then
    # then choose the right one in the order: changed, added, or existing.
    # Remember, we are merging the additions and updates with last weeks run,
    # which is where the existing come from. All of this is due to the fact
    # that a record can exist in all three places, which is not ideal but is
    # reality.
    if len(values) == 1:
      value = _safe_get(values[0][1])
      if value:
        output.put(key, value)
    elif len(values) > 1:
      pivoted = parallel.pivot_values(values)

      change = _safe_get(pivoted.get('change', []))
      add = _safe_get(pivoted.get('add', []))
      init = _safe_get(pivoted.get('init', []))

      if change:
        output.put(key, change)
      elif add:
        output.put(key, add)
      else:
        output.put(key, init)

コード例 #3

0

ファイルを表示

ファイル: pipeline.py プロジェクト: FDA/openfda

    def _join(self, key, values):
        val = parallel.pivot_values(values)

        final = {'device': [], 'mdr_text': [], 'patient': []}

        if not val.get('mdrfoi', []):
            # logging.info('MDR REPORT %s: Missing mdrfoi record, Skipping join', key)
            return

        for i, main_report in enumerate(val.get('mdrfoi', [])):
            final.update(main_report)

        try:
            int(final.get('mdr_report_key', None))
        except TypeError:
            logging.info('%s', '*' * 2400)
            return

        for source_file, target_key in self.join_map.items():
            for row in val.get(source_file, []):
                row.pop('mdr_report_key',
                        0)  # No need to keep join key on nested data
                if target_key != 'mdr_text' or len([
                        txt for txt in final[target_key]
                        if txt['mdr_text_key'] == row['mdr_text_key']
                ]) == 0:
                    final[target_key].append(row)

        # Now tuck the device and patient problem codes onto the final record
        if val.get('foidevproblem', []):
            final['product_problems'] = list(
                map(lambda x: x['product_problem'], val['foidevproblem']))

        # https://github.com/FDA/openfda/issues/179
        # In some cases we have patient problem codes without the actual patient.
        # We create a 'placeholder' empty patient record in this case just to hold the problem codes.
        for patient_problem in val.get('patientproblemcode', []):
            if len([
                    patient for patient in final['patient']
                    if patient['patient_sequence_number'] ==
                    patient_problem['patient_sequence_number']
            ]) == 0:
                patient = {
                    'patient_sequence_number':
                    patient_problem['patient_sequence_number']
                }
                final['patient'].append(patient)

        for patient in final['patient']:
            for patient_problem in val.get('patientproblemcode', []):
                if patient['patient_sequence_number'] == patient_problem[
                        'patient_sequence_number']:
                    patient['patient_problems'] = [
                        patient_problem['patient_problem']
                    ] if patient.get('patient_problems') is None else patient[
                        'patient_problems'] + [
                            patient_problem['patient_problem']
                        ]

        return final

コード例 #4

0

ファイルを表示

    def reduce(self, key, values, output):
        def _safe_get(value):
            if isinstance(value, list):
                if len(value) > 0:
                    value = value[0]
                else:
                    return None

            return value

        # If there is only one value, then we use it. If there are many then
        # then choose the right one in the order: changed, added, or existing.
        # Remember, we are merging the additions and updates with last weeks run,
        # which is where the existing come from. All of this is due to the fact
        # that a record can exist in all three places, which is not ideal but is
        # reality.
        if len(values) == 1:
            value = _safe_get(values[0][1])
            if value:
                output.put(key, value)
        elif len(values) > 1:
            pivoted = parallel.pivot_values(values)

            change = _safe_get(pivoted.get('change', []))
            add = _safe_get(pivoted.get('add', []))
            init = _safe_get(pivoted.get('init', []))

            if change:
                output.put(key, change)
            elif add:
                output.put(key, add)
            else:
                output.put(key, init)

コード例 #5

0

ファイルを表示

ファイル: pipeline.py プロジェクト: FDA/openfda

  def _join(self, key, values):
    val = parallel.pivot_values(values)

    final = {
      'device': [],
      'mdr_text': [],
      'patient': []
    }

    if not val.get('mdrfoi', []):
      logging.info('MDR REPORT %s: Missing mdrfoi record, Skipping join', key)
      return

    for i, main_report in enumerate(val.get('mdrfoi', [])):
      final.update(main_report)

    try:
      int(final.get('mdr_report_key', None))
    except TypeError:
      logging.info('%s', '*' * 2400)
      return

    for source_file, target_key in self.join_map.items():
      for row in val.get(source_file, []):
        row.pop('mdr_report_key', 0) # No need to keep join key on nested data
        final[target_key].append(row)

    return final

コード例 #6

0

ファイルを表示

 def _join(self, values):
     intermediate = parallel.pivot_values(values)
     result = []
     # There should be only one estblishment type streamed
     est_type = intermediate.get('estabtypes', [])[0]
     for data in intermediate.get('listing_estabtypes', []):
         final = dict(data.items() + est_type.items())
         result.append(final)
     return result

コード例 #7

0

ファイルを表示

ファイル: pipeline.py プロジェクト: dataminer-x/openfda

 def _join(self, values):
     intermediate = parallel.pivot_values(values)
     result = []
     # There should be only one estblishment type streamed
     est_type = intermediate.get("estabtypes", [])[0]
     for data in intermediate.get("listing_estabtypes", []):
         final = dict(data.items() + est_type.items())
         result.append(final)
     return result

コード例 #8

0

ファイルを表示

ファイル: pipeline.py プロジェクト: dataminer-x/openfda

    def _join(self, values):
        intermediate = parallel.pivot_values(values)
        result = []

        for row in intermediate.get("remapped_registration_listing", []):
            final = dict(row)
            final["products"] = intermediate.get("listing_pcd", [])
            final["proprietary_name"] = intermediate.get("listing_proprietary_name", [])
            result.append(final)

        return result

コード例 #9

0

ファイルを表示

ファイル: pipeline.py プロジェクト: tralfamadoriangray/openfda

  def _join(self, values):
    intermediate = parallel.pivot_values(values)
    result = []

    for row in intermediate.get('remapped_registration_listing', []):
      final = dict(row)
      final['products'] = intermediate.get('listing_pcd', [])
      final['proprietary_name'] = intermediate.get('listing_proprietary_name', [])
      result.append(final)

    return result

コード例 #10

0

ファイルを表示

ファイル: pipeline.py プロジェクト: FDA/openfda

 def _join(self, values):
     intermediate = parallel.pivot_values(values)
     result = []
     # There should be only one estblishment type streamed
     # However, establishment type with ID of 4 no longer exists in estabtypes.txt, but there
     # were at least two listing_estabtypes records still referring to this non-existent value.
     # Likely a data issue on the FDA side, which we need to handle here too.
     est_type = intermediate.get('estabtypes', [{'description': ''}])[0]
     for data in intermediate.get('listing_estabtypes', []):
         final = dict(list(data.items()) + list(est_type.items()))
         result.append(final)
     return result

コード例 #11

0

ファイルを表示

ファイル: pipeline.py プロジェクト: ColMac/openfda

  def _join(self, values):
    address_keys = [
      'address_line_1',
      'address_line_2',
      'city',
      'state_id',
      'zip_code',
      'postal_code',
      'iso_country_code'
    ]

    intermediate = parallel.pivot_values(values)

    # The US Agent Address is in the registration dataset, we need to pluck it
    # out and merge it with each us agent record.
    us_agent_address = {}
    for row in intermediate.get('registration', []):
      _type = row.get('address_type_id', None)
      if _type == 'U':
        us_agent_address = {k:v for k, v in row.items() if k in address_keys}

    # There are 0 or 1 US Agents assigned to a facility
    us_agent = {}
    agent_data = intermediate.get('us_agent', [])
    if agent_data:
      us_agent = dict(agent_data[0].items() + us_agent_address.items())

    # There is 0 or 1 owner operators
    owner_operator = {}
    owner_operator_data = intermediate.get('intermediate_owner_operator', [])
    if owner_operator_data:
      owner_operator = owner_operator_data[0]

    result = []
    for row in intermediate.get('registration', []):
      _type = row.get('address_type_id', None)
      # We only want `Facility` records, i.e. skip all the us agent addresses
      if _type == 'F':
        final = dict(row)
        final['us_agent'] = us_agent
        final['owner_operator'] = owner_operator
        result.append(final)

    return result

コード例 #12

0

ファイルを表示

ファイル: pipeline.py プロジェクト: tralfamadoriangray/openfda

  def _join(self, values):
    address_keys = [
      'address_line_1',
      'address_line_2',
      'city',
      'state_id',
      'zip_code',
      'postal_code',
      'iso_country_code'
    ]

    intermediate = parallel.pivot_values(values)

    # The US Agent Address is in the registration dataset, we need to pluck it
    # out and merge it with each us agent record.
    us_agent_address = {}
    for row in intermediate.get('registration', []):
      _type = row.get('address_type_id', None)
      if _type == 'U':
        us_agent_address = {k:v for k, v in row.items() if k in address_keys}

    # There are 0 or 1 US Agents assigned to a facility
    us_agent = {}
    agent_data = intermediate.get('us_agent', [])
    if agent_data:
      us_agent = dict(list(agent_data[0].items()) + list(us_agent_address.items()))

    # There is 0 or 1 owner operators
    owner_operator = {}
    owner_operator_data = intermediate.get('intermediate_owner_operator', [])
    if owner_operator_data:
      owner_operator = owner_operator_data[0]

    result = []
    for row in intermediate.get('registration', []):
      _type = row.get('address_type_id', None)
      # We only want `Facility` records, i.e. skip all the us agent addresses
      if _type == 'F':
        final = dict(row)
        final['us_agent'] = us_agent
        final['owner_operator'] = owner_operator
        result.append(final)

    return result

コード例 #13

0

ファイルを表示

ファイル: pipeline.py プロジェクト: dataminer-x/openfda

    def _join(self, values):
        address_keys = [
            "address_line_1",
            "address_line_2",
            "city",
            "state_id",
            "zip_code",
            "postal_code",
            "iso_country_code",
        ]

        intermediate = parallel.pivot_values(values)

        # The US Agent Address is in the registration dataset, we need to pluck it
        # out and merge it with each us agent record.
        us_agent_address = {}
        for row in intermediate.get("registration", []):
            _type = row.get("address_type_id", None)
            if _type == "U":
                us_agent_address = {k: v for k, v in row.items() if k in address_keys}

        # There are 0 or 1 US Agents assigned to a facility
        us_agent = {}
        agent_data = intermediate.get("us_agent", [])
        if agent_data:
            us_agent = dict(agent_data[0].items() + us_agent_address.items())

        # There is 0 or 1 owner operators
        owner_operator = {}
        owner_operator_data = intermediate.get("intermediate_owner_operator", [])
        if owner_operator_data:
            owner_operator = owner_operator_data[0]

        result = []
        for row in intermediate.get("registration", []):
            _type = row.get("address_type_id", None)
            # We only want `Facility` records, i.e. skip all the us agent addresses
            if _type == "F":
                final = dict(row)
                final["us_agent"] = us_agent
                final["owner_operator"] = owner_operator
                result.append(final)

        return result

コード例 #14

0

ファイルを表示

    def _join(self, values):
        intermediate = parallel.pivot_values(values)
        result = []

        for data in intermediate.get('intermediate_registration_listing', []):
            final = dict(data)
            final['establishment_type'] = []
            final['registration'] = []
            final['proprietary_name'] = []

            # De-dup the proprietary names
            for prop_name in data.get('proprietary_name', []):
                name = prop_name.get('proprietary_name', None)
                if name and name not in final['proprietary_name']:
                    final['proprietary_name'].append(name)

            est_join = ['registration_listing_id']
            reg_join = ['reg_key']

            est_left_key = construct_join_key(data, est_join)
            reg_left_key = construct_join_key(final, reg_join)

            # Grab just the descriptions of the establishment type
            for row in intermediate.get('intermediate_establishment_listing',
                                        []):
                est_right_key = construct_join_key(row, est_join)
                if est_left_key == est_right_key:
                    final['establishment_type'].append(row['description'])

            # There is only one registered facility
            registrant = {}
            facility = intermediate.get('intermediate_registration', [])
            if facility:
                registrant = facility[0]
            final['registration'] = registrant

            result.append(final)

        return result

コード例 #15

0

ファイルを表示

    def _join(self, values):
        intermediate = parallel.pivot_values(values)
        result = []
        for row in intermediate['owner_operator']:
            final = dict(row)
            final['official_correspondent'] = {}
            final['contact_address'] = {}
            # Should only be one address, but the intermediate interface is a list
            # so we will just grab the first item from the list.
            contact_address_data = intermediate.get('contact_addresses', None)
            if contact_address_data:
                final['contact_address'] = contact_address_data[0]

            left_key = final['reg_key']

            for data in intermediate.get('official_correspondent', []):
                right_key = data['reg_key']
                if right_key == left_key:
                    final['official_correspondent'] = data
            result.append(final)

        return result

コード例 #16

0

ファイルを表示

ファイル: pipeline.py プロジェクト: dataminer-x/openfda

    def _join(self, values):
        intermediate = parallel.pivot_values(values)
        result = []
        for row in intermediate["owner_operator"]:
            final = dict(row)
            final["official_correspondent"] = {}
            final["contact_address"] = {}
            # Should only be one address, but the intermediate interface is a list
            # so we will just grab the first item from the list.
            contact_address_data = intermediate.get("contact_addresses", None)
            if contact_address_data:
                final["contact_address"] = contact_address_data[0]

            left_key = final["reg_key"]

            for data in intermediate.get("official_correspondent", []):
                right_key = data["reg_key"]
                if right_key == left_key:
                    final["official_correspondent"] = data
            result.append(final)

        return result

コード例 #17

0

ファイルを表示

ファイル: pipeline.py プロジェクト: dataminer-x/openfda

    def _join(self, values):
        intermediate = parallel.pivot_values(values)
        result = []

        for data in intermediate.get("intermediate_registration_listing", []):
            final = dict(data)
            final["establishment_type"] = []
            final["registration"] = []
            final["proprietary_name"] = []

            # De-dup the proprietary names
            for prop_name in data.get("proprietary_name", []):
                name = prop_name.get("proprietary_name", None)
                if name and name not in final["proprietary_name"]:
                    final["proprietary_name"].append(name)

            est_join = ["registration_listing_id"]
            reg_join = ["reg_key"]

            est_left_key = construct_join_key(data, est_join)
            reg_left_key = construct_join_key(final, reg_join)

            # Grab just the descriptions of the establishment type
            for row in intermediate.get("intermediate_establishment_listing", []):
                est_right_key = construct_join_key(row, est_join)
                if est_left_key == est_right_key:
                    final["establishment_type"].append(row["description"])

            # There is only one registered facility
            registrant = {}
            facility = intermediate.get("intermediate_registration", [])
            if facility:
                registrant = facility[0]
            final["registration"] = registrant

            result.append(final)

        return result