Beispiel #1
0
def main(db_name, user_name, study_name) :
    conn = psycopg2.connect(database=db_name, user=user_name) 
    conn.autocommit=True;

    (study_id, observation_range_start, observation_range_end, _, _) = get_study_details(conn, study_name)

    personObj = BasePerson.factory(study_id)  
    person_ids = personObj.get_study_person_ids(conn)
    print("number of person ids:", len(person_ids))
    mappings = StudyToOhdsiMapping.objects.filter(study_id=study_id) 
    comparison_data = build_comparison(conn, person_ids, study_id, personObj, mappings)
    print_comparison(comparison_data, conn)
    conn.close()
Beispiel #2
0
def migrate(con, study_id, observation_number_start):
    logger.info("migrate.migrate() %d, %d", study_id, observation_number_start)
    study = Study.objects.get(study_id=study_id)
    logger.info("migrate.migrate() got study: %s", study)
    personObj = BasePerson.factory(study)
    logger.info("migrate.migrate() got person %s", personObj)
    logger.info("POPULATING  PERSON study:%d personObj:%s", study_id,
                personObj)
    personObj.populate_person(con)
    person_ids = personObj.get_study_person_ids(con)

    logger.info("MIGRATING EVENTS study:%d personObj:%s", study_id, personObj)
    events_mapping.populate(con, person_ids, study)

    logger.info("done, MIGRATING EVENTS, getting global_mappings")
    global_mappings = StudyToOhdsiMapping.objects.filter(study_id=study_id)
    logger.info("MIGRATING study %d with %d mappings ", study_id,
                len(global_mappings))
    max_observation = migrate_by_mappings(con, global_mappings,
                                          observation_number_start, personObj,
                                          person_ids)
    con.commit()
    return max_observation
Beispiel #3
0
def populate(con, person_id_list, study):
    """ populate the ohdsi person table.
        Be wary of the fact that the list of person_ids is a list of ohdsi_ids,
        and that when you query study tables those ids need converted.
    """
    personObj = BasePerson.factory(study)
    id_col = personObj.get_id_field_name()
    cur = con.cursor()
    event_mappings = _read_event_mappings(con, study.study_id)
    procedure_id=0
    visit_id=0
    for row in event_mappings:
        logger.info("XX events_mapping.populate() %s", row)
        from_table_name=row['from_table']
        prefix = from_table_name.split('_')[0]
        for person_id in person_id_list:
            query=""

            # QUERY FOR THE VALUES,  BEST SPECIFIC? TODO
            if (row['from_column'] != NULL_PLACEHOLDER):
                # a value and a date, like the Death table
                if (row['where_clause'] != NULL_PLACEHOLDER) :
                    query = ("SELECT {0}, {1} from {2} where " + id_col + " = %s and ( {3} )").format(row['from_date_column'], row['from_column'], row['from_table'], row['where_clause'])
                    #logger.debug("QUERY1:%s  %s", query, person_id)
                    logger.info("QUERY1:%s  %s", query, person_id)
                    cur.execute(query, (personObj.convert_person_id_to_study(person_id),))
                else:
                    query = ("SELECT {0}, {1} from {2} where " + id_col + " = %s").format(row['from_date_column'], row['from_column'], row['from_table'])
                    #logger.debug("QUERY2: %s, %s", query, row)
                    logger.info("QUERY2: %s, %s", query, row)
                    cur.execute(query, (personObj.convert_person_id_to_study(person_id),))
            else:
                # just a date, like the Occurrence tables:
                if (row['where_clause'] != NULL_PLACEHOLDER) :
                    query = ("SELECT {0} from {1} where " + id_col + " = %s and ( {2} )").format(row['from_date_column'], row['from_table'], row['where_clause'])
                    #logger.debug("QUERY3: %s   %s", query, row)
                    logger.info("QUERY3: %s   %s", query, row)
                    cur.execute(query, (personObj.convert_person_id_to_study(person_id),))
                else:
                    query = ("SELECT {0} from {1} where " + id_col + " = %s").format(row['from_date_column'], row['from_table'])
                    #logger.debug("QUERY4:  %s  %s", query, row)
                    logger.info("QUERY4:  %s  %s", query, row)
                    cur.execute(query, (personObj.convert_person_id_to_study(person_id),))
            value_rows = cur.fetchall()
            logger.debug("events.populate() from:%s to:%s rows:%d", from_table_name, row['to_table'], len(value_rows))

            # LOOKUP the id (vocab, concept) from the mappings row
            concept_id = Concept.objects.get(vocabulary_id=row['value_vocabulary_id'], concept_code=row['value_concept_code']).concept_id

            # INSERT
            if (len(value_rows) == 0):
                logger.warn("no rows back from %s person:%s, with %s", query, person_id, row)
            elif (concept_id == None) :
                logger.error("No concept  %s, %s", row['value_vocabulary_id'], row['value_concept_code'])
            else:
                for value_row in value_rows:
                    if value_row[0] != None :
                        logger.debug("VALUE ROWS pid:%s  query:%s  value:%s  num-rows:%d", person_id, query,  value_row, len(value_rows))
                        to_table_name=row['to_table']
                        # sometimes this is a date, sometimes a string. Use string, the lowest-common denominator, works for all sources
                        the_date_value=''
                        try:
                            date_time_string = str(value_row[0])
                            (year, month, day)  = date_time_string.split(' ')[0].split('-')
                            the_date_value = "{0}/{1}/{2}".format(month, day, year)
                        except:
                            logger.error("populate raised on {}".format(date_time_string))
                            the_date_value = date_time_string

                        # INSERT DEATH
                        if to_table_name == 'Death':
                            statement = "INSERT into death (person_id, death_date, death_datetime, death_type_concept_id, cause_concept_id)" \
                                + " values ( %s,  %s, %s,  %s, %s)"
                            logger.debug("death: %s, %s, %s, %s, %s %s %s %s); ",
                                statement, person_id, the_date_value, row['addl_value'], concept_id,
                                row['value_vocabulary_id'], row['value_concept_code'], value_row[0] )

                            cur.execute(statement, (person_id, the_date_value, the_date_value, row['addl_value'], concept_id))

                        # INSERT VISIT OCCURRENCE
                        elif to_table_name == 'visit_occurrence':
                            statement = ("INSERT into visit_occurrence "
                                        "(visit_occurrence_id, person_id, visit_concept_id, visit_start_date, "
                                        " visit_start_datetime, visit_end_date,  visit_type_concept_id)"
                                        " values ( %s,  %s,  %s,  %s, %s, %s, %s)")
                            logger.debug("visit %s %s %s %s %s %s %s %s", statement, visit_id, person_id, concept_id, the_date_value,
                                row['addl_value'], row['value_vocabulary_id'], row['value_concept_code'])
                            cur.execute(statement, (visit_id, person_id, concept_id,  the_date_value, the_date_value, the_date_value, row['addl_value']))
                            visit_id += 1

                        # INSERT PROCEDURE  OCCURRENCE
                        elif to_table_name == 'procedure_occurrence':
                            statement = ("INSERT into procedure_occurrence"
                                        " (procedure_occurrence_id, person_id, procedure_concept_id, "
                                        "  procedure_date, procedure_datetime, procedure_type_concept_id)"\
                                        " values ( %s,  %s,  %s,  %s, %s, %s)")
                            logger.debug("proc: %s %s %s %s *%s* %s %s %s %s", statement, procedure_id, person_id, concept_id,
                                the_date_value, row['addl_value'], row['value_vocabulary_id'], row['value_concept_code'], value_row[0] )
                            cur.execute(statement, (procedure_id, person_id, concept_id, the_date_value, the_date_value, row['addl_value']))
                            procedure_id += 1
                        else:
                            logger.error("unknown table name %s in events.populate() %s", to_table_name, row)
                    else:
                        logger.warn("None value in  events_mapping.populate() with %s", value_row)
        value_rows=None

    cur.close()
    con.commit()