def get_ohdsi_value_by_vocabulary_concept(request, study_id, vocabulary_id, concept_code, table_name): # TODO marked for when we go beyon just picking the first value for each person (cursor, con) = get_cursor() personObj = BasePerson.factory_on_id(int(study_id)) # date_column_name = personObj.get_date_column_for_table(mapping['from_table']) id_column_name = personObj.get_id_field_name() person_ids = personObj.get_study_person_ids(con) json_list = list() for person_id in person_ids: value_row = {} study_person_id = personObj.convert_person_id_to_study(person_id) value_row[id_column_name] = study_person_id tuples = fetch(con, table_name, person_id, vocabulary_id, concept_code) if (len(tuples) > 0): (value_as_number, value_as_string, value_as_concept_id, date) = tuples[0] value_row['value_as_number'] = value_as_number value_row['value_as_string'] = value_as_string value_row['value_as_concept_id'] = value_as_concept_id value_row['date'] = date json_list.append(value_row) #else : # print("no value for ", study_id, vocabulary_id, concept_code, table_name, person_id) cursor.close() con.close() return (JsonResponse(json_list, safe=False, status=200)) # JsonResponse application/json
def get_study_values(request, study_id, table_name, column_name): # FETCH the hard way from a dynamically created query (value_cursor, con) = get_cursor() personObj = BasePerson.factory_on_id(int(study_id)) print("DEBUG", study_id) print("DEBUG", personObj) person_ids = personObj.get_study_person_ids(con)[:10] #def select_values(mapping, personObj, value_cursor) : #""" Selects values from study tables. # Mapping has keys from_table, from_column, optionally from_where_clause, from_where_column, has_date # Returns value_rows with fields id_column_name, from_column, optionally date_value, #mapping = { 'from_table' : table_name, 'from_column' : column_name , 'from_where_clause':None, 'has_date':None, 'from_where_column':None} mapping = {'from_table': table_name, 'from_column': column_name} rows = select_values_from_dict(mapping, personObj, person_ids, value_cursor) value_cursor.close() con.close() # SERIALIZE json_list = list() for row in rows: print("DEBUG: get_study_values() ROW", row) serializer = StudyValueSerializer(row) serialized = serializer.data json_list.append(serialized) return (JsonResponse(json_list, safe=False, status=200)) # JsonResponse application/json
def main(db_name, user_name, study_name) : conn = psycopg2.connect(database=db_name, user=user_name) conn.autocommit=True; (study_id, observation_range_start, observation_range_end, _, _) = get_study_details(conn, study_name) personObj = BasePerson.factory(study_id) person_ids = personObj.get_study_person_ids(conn) print("number of person ids:", len(person_ids)) mappings = StudyToOhdsiMapping.objects.filter(study_id=study_id) comparison_data = build_comparison(conn, person_ids, study_id, personObj, mappings) print_comparison(comparison_data, conn) conn.close()
def get_study_value_by_table_column(request, study_id, table_name, column_name): (cursor, con) = get_cursor() personObj = BasePerson.factory_on_id(int(study_id)) person_ids = personObj.get_study_person_ids(con) mapping_row = {'from_table': table_name, 'from_column': column_name} values = select_values_from_dict(mapping_row, personObj, person_ids, cursor) json_list = list() summary = _summarize_study_values(values, column_name) json_list.append(summary) cursor.close() con.close() return (JsonResponse(json_list, safe=False, status=200)) # JsonResponse application/json
def migrate(con, study_id, observation_number_start): logger.info("migrate.migrate() %d, %d", study_id, observation_number_start) study = Study.objects.get(study_id=study_id) logger.info("migrate.migrate() got study: %s", study) personObj = BasePerson.factory(study) logger.info("migrate.migrate() got person %s", personObj) logger.info("POPULATING PERSON study:%d personObj:%s", study_id, personObj) personObj.populate_person(con) person_ids = personObj.get_study_person_ids(con) logger.info("MIGRATING EVENTS study:%d personObj:%s", study_id, personObj) events_mapping.populate(con, person_ids, study) logger.info("done, MIGRATING EVENTS, getting global_mappings") global_mappings = StudyToOhdsiMapping.objects.filter(study_id=study_id) logger.info("MIGRATING study %d with %d mappings ", study_id, len(global_mappings)) max_observation = migrate_by_mappings(con, global_mappings, observation_number_start, personObj, person_ids) con.commit() return max_observation
def main(db_name, user_name, study_name, extraction_id): try: conn = psycopg2.connect(database=db_name, user=user_name) (study_id, observation_range_start, observation_range_end, _, _) = get_study_details(conn, study_name) extraction = Extract(conn) person_obj = BasePerson.factory_on_id(study_id) person_ids = person_obj.get_study_person_ids(conn) logger.info("extracting %d persons...", len(list(person_ids))) (melted_rows, column_names) = extraction.rule_driven_melted_extraction( person_ids, extraction_id) wide_rows = extraction.rule_driven_wide_extraction( person_ids, extraction_id) logger.info("...extracted %d persons.", len(list(person_ids))) # # # VERIFY (TODO - resurrect ?) # logg # extraction._verify_extraction_matrix(melted_rows, extraction_id) # # # # STATS # # stat_type = [min, max, avg, n, sum, n_rules] # # long_name -> stat_type -> value # stats = extraction._get_extraction_matrix_stats(melted_rows, extraction_id) # for to_column in stats: # stats[to_column]['avg'] = float(stats[to_column]['sum']) / float(stats[to_column]['n']) # # # if min values is stil maxint, something's fishy: # ### ? logger.warn("min with issues (min == maxint) ...probably a phenotype that doesn't have stats because it has few enough distinct values to fall under the \"instances\" group:") # for (to_column, col_stats) in stats.items(): # if col_stats['min'] == sys.maxsize: # logger.info("bad minimums: %s n:%s sum:%s min/avg/max:%s ", to_column, col_stats['n'], # col_stats['sum'], (col_stats['min'], col_stats['avg'], col_stats['max'])) # # # for (to_column, col_stats) in stats.items(): # if col_stats['min'] != sys.maxsize: # logger.info("ok minimum:%s n:%s sum:%s min/avg/max:%s ", to_column, col_stats['n'], # col_stats['sum'], (col_stats['min'], col_stats['avg'], col_stats['max'])) # # for to_column in stats: # logger.info("STATSs: to_col:%s", to_column) # for (concept, counts) in stats[to_column]['concepts'].items(): # logger.info("STATSs: col:%s vocab:%s term:%s concept:%s counts:%s", to_column, stats[to_column]['vocab'], # stats[to_column]['term'], concept, counts) # PRINT csv_file = open(OUTPUT_BASE + '/' + study_name.lower() + '.csv', 'w+') logger.info("starting to write file %s", csv_file) extraction.print_extraction_header(melted_rows, wide_rows, column_names, csv_file) logger.info("...header in %s", csv_file) na_columns = extraction.print_extraction_data(melted_rows, wide_rows, csv_file, study_name) ##os.close(csv_file) # NA SUMMARY (TODO, don't lose (forget about) this functionality) logger.info("summary:num_columns:%s", len(na_columns)) for (term, count) in na_columns.items(): logger.info("summary %s, %s", term, count) logger.warning("EXTRACT complete") except Exception as e: logger.error("extract main():%s", e) traceback.print_tb(e.__traceback__) raise e conn.commit() conn.close()
def populate(con, person_id_list, study): """ populate the ohdsi person table. Be wary of the fact that the list of person_ids is a list of ohdsi_ids, and that when you query study tables those ids need converted. """ personObj = BasePerson.factory(study) id_col = personObj.get_id_field_name() cur = con.cursor() event_mappings = _read_event_mappings(con, study.study_id) procedure_id=0 visit_id=0 for row in event_mappings: logger.info("XX events_mapping.populate() %s", row) from_table_name=row['from_table'] prefix = from_table_name.split('_')[0] for person_id in person_id_list: query="" # QUERY FOR THE VALUES, BEST SPECIFIC? TODO if (row['from_column'] != NULL_PLACEHOLDER): # a value and a date, like the Death table if (row['where_clause'] != NULL_PLACEHOLDER) : query = ("SELECT {0}, {1} from {2} where " + id_col + " = %s and ( {3} )").format(row['from_date_column'], row['from_column'], row['from_table'], row['where_clause']) #logger.debug("QUERY1:%s %s", query, person_id) logger.info("QUERY1:%s %s", query, person_id) cur.execute(query, (personObj.convert_person_id_to_study(person_id),)) else: query = ("SELECT {0}, {1} from {2} where " + id_col + " = %s").format(row['from_date_column'], row['from_column'], row['from_table']) #logger.debug("QUERY2: %s, %s", query, row) logger.info("QUERY2: %s, %s", query, row) cur.execute(query, (personObj.convert_person_id_to_study(person_id),)) else: # just a date, like the Occurrence tables: if (row['where_clause'] != NULL_PLACEHOLDER) : query = ("SELECT {0} from {1} where " + id_col + " = %s and ( {2} )").format(row['from_date_column'], row['from_table'], row['where_clause']) #logger.debug("QUERY3: %s %s", query, row) logger.info("QUERY3: %s %s", query, row) cur.execute(query, (personObj.convert_person_id_to_study(person_id),)) else: query = ("SELECT {0} from {1} where " + id_col + " = %s").format(row['from_date_column'], row['from_table']) #logger.debug("QUERY4: %s %s", query, row) logger.info("QUERY4: %s %s", query, row) cur.execute(query, (personObj.convert_person_id_to_study(person_id),)) value_rows = cur.fetchall() logger.debug("events.populate() from:%s to:%s rows:%d", from_table_name, row['to_table'], len(value_rows)) # LOOKUP the id (vocab, concept) from the mappings row concept_id = Concept.objects.get(vocabulary_id=row['value_vocabulary_id'], concept_code=row['value_concept_code']).concept_id # INSERT if (len(value_rows) == 0): logger.warn("no rows back from %s person:%s, with %s", query, person_id, row) elif (concept_id == None) : logger.error("No concept %s, %s", row['value_vocabulary_id'], row['value_concept_code']) else: for value_row in value_rows: if value_row[0] != None : logger.debug("VALUE ROWS pid:%s query:%s value:%s num-rows:%d", person_id, query, value_row, len(value_rows)) to_table_name=row['to_table'] # sometimes this is a date, sometimes a string. Use string, the lowest-common denominator, works for all sources the_date_value='' try: date_time_string = str(value_row[0]) (year, month, day) = date_time_string.split(' ')[0].split('-') the_date_value = "{0}/{1}/{2}".format(month, day, year) except: logger.error("populate raised on {}".format(date_time_string)) the_date_value = date_time_string # INSERT DEATH if to_table_name == 'Death': statement = "INSERT into death (person_id, death_date, death_datetime, death_type_concept_id, cause_concept_id)" \ + " values ( %s, %s, %s, %s, %s)" logger.debug("death: %s, %s, %s, %s, %s %s %s %s); ", statement, person_id, the_date_value, row['addl_value'], concept_id, row['value_vocabulary_id'], row['value_concept_code'], value_row[0] ) cur.execute(statement, (person_id, the_date_value, the_date_value, row['addl_value'], concept_id)) # INSERT VISIT OCCURRENCE elif to_table_name == 'visit_occurrence': statement = ("INSERT into visit_occurrence " "(visit_occurrence_id, person_id, visit_concept_id, visit_start_date, " " visit_start_datetime, visit_end_date, visit_type_concept_id)" " values ( %s, %s, %s, %s, %s, %s, %s)") logger.debug("visit %s %s %s %s %s %s %s %s", statement, visit_id, person_id, concept_id, the_date_value, row['addl_value'], row['value_vocabulary_id'], row['value_concept_code']) cur.execute(statement, (visit_id, person_id, concept_id, the_date_value, the_date_value, the_date_value, row['addl_value'])) visit_id += 1 # INSERT PROCEDURE OCCURRENCE elif to_table_name == 'procedure_occurrence': statement = ("INSERT into procedure_occurrence" " (procedure_occurrence_id, person_id, procedure_concept_id, " " procedure_date, procedure_datetime, procedure_type_concept_id)"\ " values ( %s, %s, %s, %s, %s, %s)") logger.debug("proc: %s %s %s %s *%s* %s %s %s %s", statement, procedure_id, person_id, concept_id, the_date_value, row['addl_value'], row['value_vocabulary_id'], row['value_concept_code'], value_row[0] ) cur.execute(statement, (procedure_id, person_id, concept_id, the_date_value, the_date_value, row['addl_value'])) procedure_id += 1 else: logger.error("unknown table name %s in events.populate() %s", to_table_name, row) else: logger.warn("None value in events_mapping.populate() with %s", value_row) value_rows=None cur.close() con.commit()