def get_ohdsi_value_by_vocabulary_concept(request, study_id, vocabulary_id,
                                          concept_code, table_name):

    # TODO marked for when we go beyon just picking the first value for each person
    (cursor, con) = get_cursor()
    personObj = BasePerson.factory_on_id(int(study_id))
    # date_column_name = personObj.get_date_column_for_table(mapping['from_table'])
    id_column_name = personObj.get_id_field_name()
    person_ids = personObj.get_study_person_ids(con)
    json_list = list()
    for person_id in person_ids:
        value_row = {}
        study_person_id = personObj.convert_person_id_to_study(person_id)
        value_row[id_column_name] = study_person_id
        tuples = fetch(con, table_name, person_id, vocabulary_id, concept_code)
        if (len(tuples) > 0):
            (value_as_number, value_as_string, value_as_concept_id,
             date) = tuples[0]
            value_row['value_as_number'] = value_as_number
            value_row['value_as_string'] = value_as_string
            value_row['value_as_concept_id'] = value_as_concept_id
            value_row['date'] = date
            json_list.append(value_row)

        #else :
        #    print("no value for ", study_id, vocabulary_id, concept_code, table_name, person_id)
    cursor.close()
    con.close()
    return (JsonResponse(json_list, safe=False,
                         status=200))  # JsonResponse application/json
def get_study_values(request, study_id, table_name, column_name):
    # FETCH the hard way from a dynamically created query

    (value_cursor, con) = get_cursor()

    personObj = BasePerson.factory_on_id(int(study_id))
    print("DEBUG", study_id)
    print("DEBUG", personObj)
    person_ids = personObj.get_study_person_ids(con)[:10]
    #def select_values(mapping, personObj,  value_cursor) :
    #""" Selects values from study tables.
    #    Mapping has keys from_table, from_column, optionally from_where_clause, from_where_column, has_date
    #    Returns value_rows with fields id_column_name, from_column, optionally date_value,
    #mapping = { 'from_table' : table_name, 'from_column' : column_name , 'from_where_clause':None, 'has_date':None, 'from_where_column':None}
    mapping = {'from_table': table_name, 'from_column': column_name}
    rows = select_values_from_dict(mapping, personObj, person_ids,
                                   value_cursor)
    value_cursor.close()
    con.close()

    # SERIALIZE
    json_list = list()
    for row in rows:
        print("DEBUG: get_study_values() ROW", row)
        serializer = StudyValueSerializer(row)
        serialized = serializer.data
        json_list.append(serialized)
    return (JsonResponse(json_list, safe=False,
                         status=200))  # JsonResponse application/json
Exemple #3
0
def main(db_name, user_name, study_name) :
    conn = psycopg2.connect(database=db_name, user=user_name) 
    conn.autocommit=True;

    (study_id, observation_range_start, observation_range_end, _, _) = get_study_details(conn, study_name)

    personObj = BasePerson.factory(study_id)  
    person_ids = personObj.get_study_person_ids(conn)
    print("number of person ids:", len(person_ids))
    mappings = StudyToOhdsiMapping.objects.filter(study_id=study_id) 
    comparison_data = build_comparison(conn, person_ids, study_id, personObj, mappings)
    print_comparison(comparison_data, conn)
    conn.close()
def get_study_value_by_table_column(request, study_id, table_name,
                                    column_name):
    (cursor, con) = get_cursor()
    personObj = BasePerson.factory_on_id(int(study_id))
    person_ids = personObj.get_study_person_ids(con)
    mapping_row = {'from_table': table_name, 'from_column': column_name}
    values = select_values_from_dict(mapping_row, personObj, person_ids,
                                     cursor)
    json_list = list()
    summary = _summarize_study_values(values, column_name)
    json_list.append(summary)
    cursor.close()
    con.close()
    return (JsonResponse(json_list, safe=False,
                         status=200))  # JsonResponse application/json
Exemple #5
0
def migrate(con, study_id, observation_number_start):
    logger.info("migrate.migrate() %d, %d", study_id, observation_number_start)
    study = Study.objects.get(study_id=study_id)
    logger.info("migrate.migrate() got study: %s", study)
    personObj = BasePerson.factory(study)
    logger.info("migrate.migrate() got person %s", personObj)
    logger.info("POPULATING  PERSON study:%d personObj:%s", study_id,
                personObj)
    personObj.populate_person(con)
    person_ids = personObj.get_study_person_ids(con)

    logger.info("MIGRATING EVENTS study:%d personObj:%s", study_id, personObj)
    events_mapping.populate(con, person_ids, study)

    logger.info("done, MIGRATING EVENTS, getting global_mappings")
    global_mappings = StudyToOhdsiMapping.objects.filter(study_id=study_id)
    logger.info("MIGRATING study %d with %d mappings ", study_id,
                len(global_mappings))
    max_observation = migrate_by_mappings(con, global_mappings,
                                          observation_number_start, personObj,
                                          person_ids)
    con.commit()
    return max_observation
def main(db_name, user_name, study_name, extraction_id):
    try:
        conn = psycopg2.connect(database=db_name, user=user_name)
        (study_id, observation_range_start, observation_range_end, _,
         _) = get_study_details(conn, study_name)
        extraction = Extract(conn)

        person_obj = BasePerson.factory_on_id(study_id)
        person_ids = person_obj.get_study_person_ids(conn)

        logger.info("extracting %d persons...", len(list(person_ids)))
        (melted_rows, column_names) = extraction.rule_driven_melted_extraction(
            person_ids, extraction_id)
        wide_rows = extraction.rule_driven_wide_extraction(
            person_ids, extraction_id)
        logger.info("...extracted %d persons.", len(list(person_ids)))

        #
        #    # VERIFY (TODO - resurrect ?)
        #    logg
        #    extraction._verify_extraction_matrix(melted_rows, extraction_id)
        #
        #
        #    # STATS
        #    # stat_type = [min, max, avg, n, sum, n_rules]
        #    # long_name -> stat_type -> value
        #    stats = extraction._get_extraction_matrix_stats(melted_rows, extraction_id)
        #    for to_column in stats:
        #        stats[to_column]['avg'] = float(stats[to_column]['sum']) / float(stats[to_column]['n'])
        #
        #    # if min values is stil maxint, something's fishy:
        #    ### ? logger.warn("min with issues (min == maxint) ...probably a phenotype that doesn't have stats because it has few enough distinct values to fall under the \"instances\" group:")
        #    for (to_column, col_stats) in stats.items():
        #        if col_stats['min'] == sys.maxsize:
        #            logger.info("bad minimums: %s n:%s sum:%s min/avg/max:%s ", to_column, col_stats['n'],
        #                    col_stats['sum'], (col_stats['min'], col_stats['avg'], col_stats['max']))
        #
        #
        #    for (to_column, col_stats) in stats.items():
        #        if col_stats['min'] != sys.maxsize:
        #            logger.info("ok minimum:%s n:%s sum:%s min/avg/max:%s ", to_column, col_stats['n'],
        #                    col_stats['sum'], (col_stats['min'], col_stats['avg'], col_stats['max']))
        #
        #    for to_column in stats:
        #        logger.info("STATSs: to_col:%s", to_column)
        #        for (concept, counts) in stats[to_column]['concepts'].items():
        #            logger.info("STATSs: col:%s vocab:%s term:%s concept:%s counts:%s", to_column, stats[to_column]['vocab'],
        #                stats[to_column]['term'], concept, counts)

        # PRINT
        csv_file = open(OUTPUT_BASE + '/' + study_name.lower() + '.csv', 'w+')
        logger.info("starting to write file %s", csv_file)
        extraction.print_extraction_header(melted_rows, wide_rows,
                                           column_names, csv_file)
        logger.info("...header in %s", csv_file)
        na_columns = extraction.print_extraction_data(melted_rows, wide_rows,
                                                      csv_file, study_name)
        ##os.close(csv_file)

        # NA SUMMARY (TODO, don't lose (forget about) this functionality)
        logger.info("summary:num_columns:%s", len(na_columns))
        for (term, count) in na_columns.items():
            logger.info("summary %s, %s", term, count)
        logger.warning("EXTRACT complete")

    except Exception as e:
        logger.error("extract main():%s", e)
        traceback.print_tb(e.__traceback__)
        raise e

    conn.commit()
    conn.close()
Exemple #7
0
def populate(con, person_id_list, study):
    """ populate the ohdsi person table.
        Be wary of the fact that the list of person_ids is a list of ohdsi_ids,
        and that when you query study tables those ids need converted.
    """
    personObj = BasePerson.factory(study)
    id_col = personObj.get_id_field_name()
    cur = con.cursor()
    event_mappings = _read_event_mappings(con, study.study_id)
    procedure_id=0
    visit_id=0
    for row in event_mappings:
        logger.info("XX events_mapping.populate() %s", row)
        from_table_name=row['from_table']
        prefix = from_table_name.split('_')[0]
        for person_id in person_id_list:
            query=""

            # QUERY FOR THE VALUES,  BEST SPECIFIC? TODO
            if (row['from_column'] != NULL_PLACEHOLDER):
                # a value and a date, like the Death table
                if (row['where_clause'] != NULL_PLACEHOLDER) :
                    query = ("SELECT {0}, {1} from {2} where " + id_col + " = %s and ( {3} )").format(row['from_date_column'], row['from_column'], row['from_table'], row['where_clause'])
                    #logger.debug("QUERY1:%s  %s", query, person_id)
                    logger.info("QUERY1:%s  %s", query, person_id)
                    cur.execute(query, (personObj.convert_person_id_to_study(person_id),))
                else:
                    query = ("SELECT {0}, {1} from {2} where " + id_col + " = %s").format(row['from_date_column'], row['from_column'], row['from_table'])
                    #logger.debug("QUERY2: %s, %s", query, row)
                    logger.info("QUERY2: %s, %s", query, row)
                    cur.execute(query, (personObj.convert_person_id_to_study(person_id),))
            else:
                # just a date, like the Occurrence tables:
                if (row['where_clause'] != NULL_PLACEHOLDER) :
                    query = ("SELECT {0} from {1} where " + id_col + " = %s and ( {2} )").format(row['from_date_column'], row['from_table'], row['where_clause'])
                    #logger.debug("QUERY3: %s   %s", query, row)
                    logger.info("QUERY3: %s   %s", query, row)
                    cur.execute(query, (personObj.convert_person_id_to_study(person_id),))
                else:
                    query = ("SELECT {0} from {1} where " + id_col + " = %s").format(row['from_date_column'], row['from_table'])
                    #logger.debug("QUERY4:  %s  %s", query, row)
                    logger.info("QUERY4:  %s  %s", query, row)
                    cur.execute(query, (personObj.convert_person_id_to_study(person_id),))
            value_rows = cur.fetchall()
            logger.debug("events.populate() from:%s to:%s rows:%d", from_table_name, row['to_table'], len(value_rows))

            # LOOKUP the id (vocab, concept) from the mappings row
            concept_id = Concept.objects.get(vocabulary_id=row['value_vocabulary_id'], concept_code=row['value_concept_code']).concept_id

            # INSERT
            if (len(value_rows) == 0):
                logger.warn("no rows back from %s person:%s, with %s", query, person_id, row)
            elif (concept_id == None) :
                logger.error("No concept  %s, %s", row['value_vocabulary_id'], row['value_concept_code'])
            else:
                for value_row in value_rows:
                    if value_row[0] != None :
                        logger.debug("VALUE ROWS pid:%s  query:%s  value:%s  num-rows:%d", person_id, query,  value_row, len(value_rows))
                        to_table_name=row['to_table']
                        # sometimes this is a date, sometimes a string. Use string, the lowest-common denominator, works for all sources
                        the_date_value=''
                        try:
                            date_time_string = str(value_row[0])
                            (year, month, day)  = date_time_string.split(' ')[0].split('-')
                            the_date_value = "{0}/{1}/{2}".format(month, day, year)
                        except:
                            logger.error("populate raised on {}".format(date_time_string))
                            the_date_value = date_time_string

                        # INSERT DEATH
                        if to_table_name == 'Death':
                            statement = "INSERT into death (person_id, death_date, death_datetime, death_type_concept_id, cause_concept_id)" \
                                + " values ( %s,  %s, %s,  %s, %s)"
                            logger.debug("death: %s, %s, %s, %s, %s %s %s %s); ",
                                statement, person_id, the_date_value, row['addl_value'], concept_id,
                                row['value_vocabulary_id'], row['value_concept_code'], value_row[0] )

                            cur.execute(statement, (person_id, the_date_value, the_date_value, row['addl_value'], concept_id))

                        # INSERT VISIT OCCURRENCE
                        elif to_table_name == 'visit_occurrence':
                            statement = ("INSERT into visit_occurrence "
                                        "(visit_occurrence_id, person_id, visit_concept_id, visit_start_date, "
                                        " visit_start_datetime, visit_end_date,  visit_type_concept_id)"
                                        " values ( %s,  %s,  %s,  %s, %s, %s, %s)")
                            logger.debug("visit %s %s %s %s %s %s %s %s", statement, visit_id, person_id, concept_id, the_date_value,
                                row['addl_value'], row['value_vocabulary_id'], row['value_concept_code'])
                            cur.execute(statement, (visit_id, person_id, concept_id,  the_date_value, the_date_value, the_date_value, row['addl_value']))
                            visit_id += 1

                        # INSERT PROCEDURE  OCCURRENCE
                        elif to_table_name == 'procedure_occurrence':
                            statement = ("INSERT into procedure_occurrence"
                                        " (procedure_occurrence_id, person_id, procedure_concept_id, "
                                        "  procedure_date, procedure_datetime, procedure_type_concept_id)"\
                                        " values ( %s,  %s,  %s,  %s, %s, %s)")
                            logger.debug("proc: %s %s %s %s *%s* %s %s %s %s", statement, procedure_id, person_id, concept_id,
                                the_date_value, row['addl_value'], row['value_vocabulary_id'], row['value_concept_code'], value_row[0] )
                            cur.execute(statement, (procedure_id, person_id, concept_id, the_date_value, the_date_value, row['addl_value']))
                            procedure_id += 1
                        else:
                            logger.error("unknown table name %s in events.populate() %s", to_table_name, row)
                    else:
                        logger.warn("None value in  events_mapping.populate() with %s", value_row)
        value_rows=None

    cur.close()
    con.commit()