def test_site_in_project_key_number_two(MergeToUpload, site_handle2, file_handle2, meta_handle2, project_handle2, taxa_handle2, time_handle2, count_handle2, covar_handle2): facade = face.Facade() facade.input_register(meta_handle2) facade.meta_verify() facade.input_register(file_handle2) facade.load_data() facade.input_register(site_handle2) sitedirector = facade.make_table('siteinfo') study_site_table = sitedirector._availdf siteid = site_handle2.lnedentry['study_site_key'] sitelevels = facade._data[siteid].drop_duplicates().values.tolist() facade.register_site_levels(sitelevels) print('test2 sitelevels: ', sitelevels) facade._valueregister['siteid'] = siteid print('study_site_table (test): ', study_site_table) facade.create_log_record('study_site_table') lter = meta_handle2.lnedentry['lter'] ltercol = produce_null_df(1, ['lter_table_fkey'], len(study_site_table), lter) study_site_table = concat([study_site_table, ltercol], axis=1) study_site_table_og_col = study_site_table.columns.values.tolist() study_site_table_single = study_site_table.iloc[0, :] study_site_table_single_df = DataFrame([study_site_table_single]) study_site_table_single_df.columns = study_site_table_og_col print('study site single: ', study_site_table_single) study_site_table_single_df.loc[0, 'study_site_key'] = 'NULL' print('study_site_table: ', study_site_table_single_df) facade.push_tables['study_site_table'] = study_site_table_single_df facade.input_register(project_handle2) maindirector = facade.make_table('maininfo') project_table = maindirector._availdf.copy().reset_index(drop=True) orm.convert_types(project_table, orm.project_types) facade.push_tables['project_table'] = project_table facade.create_log_record('project_table') facade.input_register(taxa_handle2) taxadirector = facade.make_table('taxainfo') taxa_table = taxadirector._availdf facade.push_tables['taxa_table'] = taxa_table print('taxa columns after make taxa table: ', taxa_table.columns) facade.create_log_record('taxa_table') print('taxa columns before time_table: ', taxa_table.columns) facade.input_register(time_handle2) timetable = tparse.TimeParse(facade._data, time_handle2.lnedentry).formater() facade.push_tables['timetable'] = timetable facade.create_log_record('timetable') print('taxa columns before count_table: ', taxa_table.columns) facade.input_register(count_handle2) rawdirector = facade.make_table('rawinfo') rawtable = rawdirector._availdf print(rawtable) facade.push_tables[count_handle2.tablename] = rawtable facade.create_log_record(count_handle2.tablename) print('taxa columns before covar_table: ', taxa_table.columns) facade.input_register(covar_handle2) covartable = ddf.DictionaryDataframe( facade._data, covar_handle2.lnedentry['columns']).convert_records() facade.push_tables['covariates'] = covartable facade.create_log_record('covartable') facade._valueregister['globalid'] = meta_handle2.lnedentry['globalid'] facade._valueregister['lter'] = meta_handle2.lnedentry['lter'] facade._valueregister['siteid'] = siteid timetable_og_cols = timetable.columns.values.tolist() timetable.columns = [x + '_derived' for x in timetable_og_cols] observationdf = facade._data observation_time_df = concat([timetable, observationdf], axis=1) print('merge class obs_time columns: ', observation_time_df.columns) print('merge class project table: ', project_table) study_site_table.to_sql('study_site_table', orm.conn, if_exists='append', index=False) project_table['lter_project_fkey'] = facade._valueregister['lter'] project_table.to_sql('project_table', orm.conn, if_exists='append', index=False) print('taxa columns before site_in_proj method: ', taxa_table.columns) merge_object = MergeToUpload() site_in_project_key_df = merge_object.site_in_proj_key_df( studysitetabledf=study_site_table, projecttabledf=project_table, observationtabledf=observation_time_df, lterlocation=facade._valueregister['lter'], studysitelabel=siteid, studysitelevels=sitelevels) print('taxa columns before user taxa merge method: ', taxa_table.columns) merge_object.merge_for_taxa_table_upload( formated_taxa_table=taxa_table, siteinprojkeydf=site_in_project_key_df, sitelabel=siteid) taxa_column_in_data = [ x[0] for x in list(facade._inputs['taxainfo'].lnedentry.items()) ] taxa_column_in_push_table = [ x[1] for x in list(facade._inputs['taxainfo'].lnedentry.items()) ] merge_object.merge_for_datatype_table_upload( raw_dataframe=observation_time_df, formated_dataframe=rawtable, formated_dataframe_name='{}'.format( re.sub('_table', '', facade._inputs['rawinfo'].tablename)), covariate_dataframe=covartable, siteinprojkeydf=site_in_project_key_df, raw_data_taxa_columns=taxa_column_in_data, uploaded_taxa_columns=taxa_column_in_push_table)
def test_site_in_project_key( MergeToUpload, site_handle_4_percent_cover, file_handle_4_percent_cover, meta_handle_4_percent_cover, project_handle_4_percent_cover, taxa_handle_4_percent_cover, time_handle_4_percent_cover, biomass_handle_4_percent_cover, covar_handle_4_percent_cover, ): facade = face.Facade() facade.input_register(meta_handle_4_percent_cover) facade.meta_verify() facade.input_register(file_handle_4_percent_cover) facade.load_data() facade.input_register(site_handle_4_percent_cover) sitedirector = facade.make_table("siteinfo") study_site_table = sitedirector._availdf print("study_site_table (test): ", study_site_table) facade.create_log_record("study_site_table") lter = meta_handle_4_percent_cover.lnedentry["lter"] ltercol = produce_null_df(1, ["lter_table_fkey"], len(study_site_table), lter) study_site_table = concat([study_site_table, ltercol], axis=1) print("study_site_table: ", study_site_table) facade.push_tables["study_site_table"] = study_site_table siteid = site_handle_4_percent_cover.lnedentry["study_site_key"] sitelevels = facade._data[siteid].drop_duplicates().values.tolist() facade.register_site_levels(sitelevels) facade._valueregister["siteid"] = siteid facade.input_register(project_handle_4_percent_cover) maindirector = facade.make_table("maininfo") project_table = maindirector._availdf.copy().reset_index(drop=True) orm.convert_types(project_table, orm.project_types) facade.push_tables["project_table"] = project_table facade.create_log_record("project_table") facade.input_register(taxa_handle_4_percent_cover) taxadirector = facade.make_table("taxainfo") taxa_table = taxadirector._availdf facade.push_tables["taxa_table"] = taxa_table facade.create_log_record("taxa_table") facade.input_register(time_handle_4_percent_cover) timetable = tparse.TimeParse(facade._data, time_handle_4_percent_cover.lnedentry).formater() facade.push_tables["timetable"] = timetable facade.create_log_record("timetable") facade.input_register(biomass_handle_4_percent_cover) rawdirector = facade.make_table("rawinfo") rawtable = rawdirector._availdf print(rawtable) facade.push_tables[biomass_handle_4_percent_cover.tablename] = rawtable facade.create_log_record(biomass_handle_4_percent_cover.tablename) facade.input_register(covar_handle_4_percent_cover) covartable = ddf.DictionaryDataframe( facade._data, covar_handle_4_percent_cover.lnedentry["columns"] ).convert_records() facade.push_tables["covariates"] = covartable facade.create_log_record("covartable") facade._valueregister["globalid"] = meta_handle_4_percent_cover.lnedentry["globalid"] facade._valueregister["lter"] = meta_handle_4_percent_cover.lnedentry["lter"] facade._valueregister["siteid"] = siteid timetable_og_cols = timetable.columns.values.tolist() timetable.columns = [x + "_derived" for x in timetable_og_cols] observationdf = facade._data observation_time_df = concat([timetable, observationdf], axis=1) print("merge class obs_time columns: ", observation_time_df.columns) print("merge class project table: ", project_table) try: study_site_table.to_sql("study_site_table", orm.conn, if_exists="append", index=False) except Exception as e: print("Sites in db: ", str(e)) project_table["lter_project_fkey"] = facade._valueregister["lter"] project_table.to_sql("project_table", orm.conn, if_exists="append", index=False) merge_object = MergeToUpload() site_in_project_key_df = merge_object.site_in_proj_key_df( studysitetabledf=study_site_table, projecttabledf=project_table, observationtabledf=observation_time_df, lterlocation=facade._valueregister["lter"], studysitelabel=siteid, studysitelevels=sitelevels, ) merge_object.merge_for_taxa_table_upload( formated_taxa_table=taxa_table, siteinprojkeydf=site_in_project_key_df, sitelabel=siteid ) taxa_column_in_push_table = [x[0] for x in list(facade._inputs["taxainfo"].lnedentry.items())] taxa_column_in_data = [x[1] for x in list(facade._inputs["taxainfo"].lnedentry.items())] merge_object.merge_for_datatype_table_upload( raw_dataframe=observation_time_df, formated_dataframe=rawtable, formated_dataframe_name="{}".format(re.sub("_table", "", facade._inputs["rawinfo"].tablename)), covariate_dataframe=covartable, siteinprojkeydf=site_in_project_key_df, raw_data_taxa_columns=taxa_column_in_data, uploaded_taxa_columns=taxa_column_in_push_table, ) obs_columns_in_data = [x[1] for x in list(facade._inputs["rawinfo"].lnedentry.items())] obs_columns_in_push_table = [x[0] for x in list(facade._inputs["rawinfo"].lnedentry.items())] merge_object.update_project_table( spatial_rep_columns_from_og_df=obs_columns_in_data, spatial_rep_columns_from_formated_df=obs_columns_in_push_table, )
def test_site_in_project_key( MergeToUpload, site_handle_3_biomass, file_handle_3_biomass, meta_handle_3_biomass, project_handle_3_biomass, taxa_handle_3_biomass, time_handle_3_biomass, count_handle_3_biomass, covar_handle_3_biomass): facade = face.Facade() facade.input_register(meta_handle_3_biomass) facade.meta_verify() facade.input_register(file_handle_3_biomass) facade.load_data() facade.input_register(site_handle_3_biomass) sitedirector = facade.make_table('siteinfo') study_site_table = sitedirector._availdf print('study_site_table (test): ', study_site_table) facade.create_log_record('study_site_table') lter = meta_handle_3_biomass.lnedentry['lter'] ltercol = produce_null_df(1, [ 'lter_table_fkey'], len(study_site_table), lter) study_site_table = concat([study_site_table, ltercol], axis=1) print('study_site_table: ', study_site_table) facade.push_tables['study_site_table'] = study_site_table siteid = site_handle_3_biomass.lnedentry['study_site_key'] sitelevels = facade._data[ siteid].drop_duplicates().values.tolist() facade.register_site_levels(sitelevels) facade._valueregister['siteid'] = siteid facade.input_register(project_handle_3_biomass) maindirector = facade.make_table('maininfo') project_table = maindirector._availdf.copy().reset_index(drop=True) orm.convert_types(project_table, orm.project_types) facade.push_tables['project_table'] = project_table facade.create_log_record('project_table') facade.input_register(taxa_handle_3_biomass) taxadirector = facade.make_table('taxainfo') taxa_table = taxadirector._availdf facade.push_tables['taxa_table'] = taxa_table facade.create_log_record('taxa_table') facade.input_register(time_handle_3_biomass) timetable = tparse.TimeParse( facade._data, time_handle_3_biomass.lnedentry).formater() facade.push_tables['timetable'] = timetable facade.create_log_record('timetable') facade.input_register(count_handle_3_biomass) rawdirector = facade.make_table('rawinfo') rawtable = rawdirector._availdf print('rawtable facade tmaker: ', rawtable) facade.push_tables[count_handle_3_biomass.tablename] = rawtable facade.create_log_record(count_handle_3_biomass.tablename) facade.input_register(covar_handle_3_biomass) covartable = ddf.DictionaryDataframe( facade._data, covar_handle_3_biomass.lnedentry['columns']).convert_records() facade.push_tables['covariates'] = covartable facade.create_log_record('covartable') facade._valueregister['globalid'] = meta_handle_3_biomass.lnedentry['globalid'] facade._valueregister['lter'] = meta_handle_3_biomass.lnedentry['lter'] facade._valueregister['siteid'] = siteid timetable_og_cols = timetable.columns.values.tolist() timetable.columns = [x+'_derived' for x in timetable_og_cols] observationdf = facade._data observation_time_df = concat([timetable,observationdf], axis=1 ) print('merge class obs_time df: ', observation_time_df) print('merge class obs_time columns: ', observation_time_df.columns) print('merge class project table: ', project_table) try: study_site_table.to_sql( 'study_site_table', orm.conn, if_exists='append', index=False) except Exception as e: print('Sites in db: ', str(e)) project_table['lter_project_fkey'] = facade._valueregister['lter'] project_table.to_sql( 'project_table', orm.conn, if_exists='append', index=False ) merge_object = MergeToUpload() site_in_project_key_df = merge_object.site_in_proj_key_df( studysitetabledf=study_site_table, projecttabledf=project_table, observationtabledf=observation_time_df, lterlocation= facade._valueregister['lter'], studysitelabel=siteid, studysitelevels=sitelevels ) merge_object.merge_for_taxa_table_upload( formated_taxa_table=taxa_table, siteinprojkeydf=site_in_project_key_df, sitelabel=siteid ) taxa_column_in_data = [ x[1] for x in list(facade._inputs['taxainfo'].lnedentry.items()) ] taxa_column_in_push_table = [ x[0] for x in list(facade._inputs['taxainfo'].lnedentry.items()) ] merge_object.merge_for_datatype_table_upload( raw_dataframe=observation_time_df, formated_dataframe=rawtable, formated_dataframe_name= '{}'.format( re.sub('_table', '', facade._inputs['rawinfo'].tablename)), covariate_dataframe = covartable, siteinprojkeydf=site_in_project_key_df, raw_data_taxa_columns=taxa_column_in_data, uploaded_taxa_columns=taxa_column_in_push_table ) obs_columns_in_data = [ x[1] for x in list(facade._inputs['rawinfo'].lnedentry.items()) ] obs_columns_in_push_table = [ x[0] for x in list(facade._inputs['rawinfo'].lnedentry.items()) ] merge_object.update_project_table( spatial_rep_columns_from_og_df=obs_columns_in_data, spatial_rep_columns_from_formated_df=obs_columns_in_push_table)
def test(Facade_push, site_handle_1_count, file_handle_1_count, meta_handle_1_count, project_handle_1_count, taxa_handle_1_count, time_handle_1_count, count_handle_1_count, covar_handle_1_count): facade = Facade_push() facade.input_register(meta_handle_1_count) facade.meta_verify() facade.input_register(file_handle_1_count) facade.load_data() facade.input_register(site_handle_1_count) sitedirector = facade.make_table('siteinfo') study_site_table = sitedirector._availdf study_site_table['lter_table_fkey'] = facade._valueregister[ 'globalid'] = meta_handle_1_count.lnedentry['lter'] print(study_site_table) facade.push_tables['study_site_table'] = study_site_table siteid = site_handle_1_count.lnedentry['study_site_key'] sitelevels = facade._data[siteid].drop_duplicates().values.tolist() facade.register_site_levels(sitelevels) facade.input_register(project_handle_1_count) maindirector = facade.make_table('maininfo') project_table = maindirector._availdf.copy().reset_index(drop=True) orm.convert_types(project_table, orm.project_types) facade.push_tables['project_table'] = project_table print('project table: ', project_table) facade.input_register(taxa_handle_1_count) taxadirector = facade.make_table('taxainfo') facade.push_tables['taxa_table'] = taxadirector._availdf facade.input_register(time_handle_1_count) timetable = tparse.TimeParse(facade._data, time_handle_1_count.lnedentry).formater() timetable_og_cols = timetable.columns.values.tolist() timetable.columns = [x + '_derived' for x in timetable_og_cols] observationdf = facade._data observation_time_df = concat([timetable, observationdf], axis=1) facade.push_tables['timetable'] = observation_time_df facade.input_register(count_handle_1_count) rawdirector = facade.make_table('rawinfo') facade.push_tables[ facade._inputs['rawinfo'].tablename] = rawdirector._availdf facade.input_register(covar_handle_1_count) covartable = ddf.DictionaryDataframe( facade._data, covar_handle_1_count.lnedentry['columns']).convert_records() facade.push_tables['covariates'] = covartable facade._valueregister['globalid'] = meta_handle_1_count.lnedentry[ 'globalid'] facade._valueregister['lter'] = meta_handle_1_count.lnedentry['lter'] facade._valueregister['siteid'] = facade._inputs['siteinfo'].lnedentry[ 'study_site_key'] facade.push_merged_data()
def test_site_in_project_key(MergeToUpload, site_handle_corner_case, file_handle_corner_case, meta_handle_corner_case, project_handle_corner_case, taxa_handle_corner_case, time_handle_corner_case, percent_cover_handle_corner_case, covar_handle_corner_case): facade = face.Facade() facade.input_register(meta_handle_corner_case) facade.meta_verify() facade.input_register(file_handle_corner_case) facade.load_data() siteid = site_handle_corner_case.lnedentry['study_site_key'] facade._data[siteid].replace( { 'C': 'site_jrn_zone_creosotebush', 'G': 'site_jrn_zone_grassland' }, inplace=True) facade.input_register(site_handle_corner_case) sitedirector = facade.make_table('siteinfo') study_site_table = sitedirector._availdf print('study_site_table (test): ', study_site_table) facade.create_log_record('study_site_table') lter = meta_handle_corner_case.lnedentry['lter'] ltercol = produce_null_df(1, ['lter_table_fkey'], len(study_site_table), lter) study_site_table = concat([study_site_table, ltercol], axis=1) print('study_site_table: ', study_site_table) facade.push_tables['study_site_table'] = study_site_table sitelevels = facade._data[siteid].drop_duplicates().values.tolist() facade.register_site_levels(sitelevels) facade._valueregister['siteid'] = siteid facade.input_register(project_handle_corner_case) maindirector = facade.make_table('maininfo') project_table = maindirector._availdf.copy().reset_index(drop=True) orm.convert_types(project_table, orm.project_types) facade.push_tables['project_table'] = project_table facade.create_log_record('project_table') facade.input_register(taxa_handle_corner_case) taxadirector = facade.make_table('taxainfo') taxa_table = taxadirector._availdf facade.push_tables['taxa_table'] = taxa_table facade.create_log_record('taxa_table') facade.input_register(time_handle_corner_case) timetable = tparse.TimeParse(facade._data, time_handle_corner_case.lnedentry).formater() facade.push_tables['timetable'] = timetable facade.create_log_record('timetable') facade.input_register(percent_cover_handle_corner_case) rawdirector = facade.make_table('rawinfo') rawtable = rawdirector._availdf print(rawtable) facade.push_tables[percent_cover_handle_corner_case.tablename] = rawtable facade.create_log_record(percent_cover_handle_corner_case.tablename) facade.input_register(covar_handle_corner_case) covartable = ddf.DictionaryDataframe( facade._data, covar_handle_corner_case.lnedentry['columns']).convert_records() facade.push_tables['covariates'] = covartable facade.create_log_record('covartable') facade._valueregister['globalid'] = meta_handle_corner_case.lnedentry[ 'globalid'] facade._valueregister['lter'] = meta_handle_corner_case.lnedentry['lter'] facade._valueregister['siteid'] = siteid timetable_og_cols = timetable.columns.values.tolist() timetable.columns = [x + '_derived' for x in timetable_og_cols] observationdf = facade._data observation_time_df = concat([timetable, observationdf], axis=1) print('merge class obs_time columns: ', observation_time_df.columns) print('merge class project table: ', project_table) # try: # study_site_table.to_sql( # 'study_site_table', # orm.conn, if_exists='append', index=False) # except Exception as e: # print('Sites in db: ', str(e)) project_table['lter_project_fkey'] = facade._valueregister['lter'] project_table.to_sql('project_table', orm.conn, if_exists='append', index=False) merge_object = MergeToUpload() site_in_project_key_df = merge_object.site_in_proj_key_df( studysitetabledf=study_site_table, projecttabledf=project_table, observationtabledf=observation_time_df, lterlocation=facade._valueregister['lter'], studysitelabel=siteid, studysitelevels=sitelevels) merge_object.merge_for_taxa_table_upload( formated_taxa_table=taxa_table, siteinprojkeydf=site_in_project_key_df, sitelabel=siteid) taxa_column_in_push_table = [ x[0] for x in list(facade._inputs['taxainfo'].lnedentry.items()) ] taxa_column_in_data = [ x[1] for x in list(facade._inputs['taxainfo'].lnedentry.items()) ] merge_object.merge_for_datatype_table_upload( raw_dataframe=observation_time_df, formated_dataframe=rawtable, formated_dataframe_name='{}'.format( re.sub('_table', '', facade._inputs['rawinfo'].tablename)), covariate_dataframe=covartable, siteinprojkeydf=site_in_project_key_df, raw_data_taxa_columns=taxa_column_in_data, uploaded_taxa_columns=taxa_column_in_push_table) obs_columns_in_data = [ x[1] for x in list(facade._inputs['rawinfo'].lnedentry.items()) ] obs_columns_in_push_table = [ x[0] for x in list(facade._inputs['rawinfo'].lnedentry.items()) ] merge_object.update_project_table( spatial_rep_columns_from_og_df=obs_columns_in_data, spatial_rep_columns_from_formated_df=obs_columns_in_push_table)
def test_site_in_project_key_number_two( MergeToUpload, site_handle2, file_handle2, meta_handle2, project_handle2, taxa_handle2, time_handle2, count_handle2, covar_handle2, ): facade = face.Facade() facade.input_register(meta_handle2) facade.meta_verify() facade.input_register(file_handle2) facade.load_data() facade.input_register(site_handle2) sitedirector = facade.make_table("siteinfo") study_site_table = sitedirector._availdf siteid = site_handle2.lnedentry["study_site_key"] sitelevels = facade._data[siteid].drop_duplicates().values.tolist() facade.register_site_levels(sitelevels) print("test2 sitelevels: ", sitelevels) facade._valueregister["siteid"] = siteid print("study_site_table (test): ", study_site_table) facade.create_log_record("study_site_table") lter = meta_handle2.lnedentry["lter"] ltercol = produce_null_df(1, ["lter_table_fkey"], len(study_site_table), lter) study_site_table = concat([study_site_table, ltercol], axis=1) study_site_table_og_col = study_site_table.columns.values.tolist() study_site_table_single = study_site_table.iloc[0, :] study_site_table_single_df = DataFrame([study_site_table_single]) study_site_table_single_df.columns = study_site_table_og_col print("study site single: ", study_site_table_single) study_site_table_single_df.loc[0, "study_site_key"] = "NULL" print("study_site_table: ", study_site_table_single_df) facade.push_tables["study_site_table"] = study_site_table_single_df facade.input_register(project_handle2) maindirector = facade.make_table("maininfo") project_table = maindirector._availdf.copy().reset_index(drop=True) orm.convert_types(project_table, orm.project_types) facade.push_tables["project_table"] = project_table facade.create_log_record("project_table") facade.input_register(taxa_handle2) taxadirector = facade.make_table("taxainfo") taxa_table = taxadirector._availdf facade.push_tables["taxa_table"] = taxa_table print("taxa columns after make taxa table: ", taxa_table.columns) facade.create_log_record("taxa_table") print("taxa columns before time_table: ", taxa_table.columns) facade.input_register(time_handle2) timetable = tparse.TimeParse(facade._data, time_handle2.lnedentry).formater() facade.push_tables["timetable"] = timetable facade.create_log_record("timetable") print("taxa columns before count_table: ", taxa_table.columns) facade.input_register(count_handle2) rawdirector = facade.make_table("rawinfo") rawtable = rawdirector._availdf print(rawtable) facade.push_tables[count_handle2.tablename] = rawtable facade.create_log_record(count_handle2.tablename) print("taxa columns before covar_table: ", taxa_table.columns) facade.input_register(covar_handle2) covartable = ddf.DictionaryDataframe(facade._data, covar_handle2.lnedentry["columns"]).convert_records() facade.push_tables["covariates"] = covartable facade.create_log_record("covartable") facade._valueregister["globalid"] = meta_handle2.lnedentry["globalid"] facade._valueregister["lter"] = meta_handle2.lnedentry["lter"] facade._valueregister["siteid"] = siteid timetable_og_cols = timetable.columns.values.tolist() timetable.columns = [x + "_derived" for x in timetable_og_cols] observationdf = facade._data observation_time_df = concat([timetable, observationdf], axis=1) print("merge class obs_time columns: ", observation_time_df.columns) print("merge class project table: ", project_table) study_site_table.to_sql("study_site_table", orm.conn, if_exists="append", index=False) project_table["lter_project_fkey"] = facade._valueregister["lter"] project_table.to_sql("project_table", orm.conn, if_exists="append", index=False) print("taxa columns before site_in_proj method: ", taxa_table.columns) merge_object = MergeToUpload() site_in_project_key_df = merge_object.site_in_proj_key_df( studysitetabledf=study_site_table, projecttabledf=project_table, observationtabledf=observation_time_df, lterlocation=facade._valueregister["lter"], studysitelabel=siteid, studysitelevels=sitelevels, ) print("taxa columns before user taxa merge method: ", taxa_table.columns) merge_object.merge_for_taxa_table_upload( formated_taxa_table=taxa_table, siteinprojkeydf=site_in_project_key_df, sitelabel=siteid ) taxa_column_in_data = [x[0] for x in list(facade._inputs["taxainfo"].lnedentry.items())] taxa_column_in_push_table = [x[1] for x in list(facade._inputs["taxainfo"].lnedentry.items())] merge_object.merge_for_datatype_table_upload( raw_dataframe=observation_time_df, formated_dataframe=rawtable, formated_dataframe_name="{}".format(re.sub("_table", "", facade._inputs["rawinfo"].tablename)), covariate_dataframe=covartable, siteinprojkeydf=site_in_project_key_df, raw_data_taxa_columns=taxa_column_in_data, uploaded_taxa_columns=taxa_column_in_push_table, )
def submit_change(self): ''' Method to get data from user form and make project table to upload ''' sender = self.sender() self.form_entries = OrderedDict( (('samplingunits', self.dtype(self.lnedDatatypeunits.text() != '', self.lnedDatatypeunits.text(), None)), ('datatype', self.dtype(self.cboxDatatype.currentText() != '', self.cboxDatatype.currentText(), None)), ('structured_type_1', self.structure( self.ckStructure1.isChecked(), self.lnedStructure1.text(), self.lnedStructureunits1.text())), ('structured_type_2', self.structure(self.ckStructure2.isChecked(), self.lnedStructure2.text(), self.lnedStructureunits2.text())), ('structured_type_3', self.structure(self.ckStructure3.isChecked(), self.lnedStructure3.text(), self.lnedStructureunits3.text())), ('structured_type_4', self.structure(self.ckStructure4.isChecked(), self.lnedStructure4.text(), self.lnedStructureunits4.text())), ('samplefreq', self.sampfreq(self.cboxSamplingfrequency.currentText() != 'NULL', self.cboxSamplingfrequency.currentText(), None)), ('studytype', self.studytype(self.cboxStudytype.currentText() != 'NULL', self.cboxStudytype.currentText(), None)), ('community', self.community( True, ('yes' if self.rbtnCommunityyes.isChecked() is True else 'no'), None)), ('spatial_replication_level_1_extent', self.ext(self.ckSpatialextent1.isChecked(), self.lnedSpatialextent1.text(), self.lnedSpatialextentunits1.text())), ('spatial_replication_level_2_extent', self.ext(self.ckSpatialextent2.isChecked(), self.lnedSpatialextent2.text(), self.lnedSpatialextentunits2.text())), ('spatial_replication_level_3_extent', self.ext(self.ckSpatialextent3.isChecked(), self.lnedSpatialextent3.text(), self.lnedSpatialextentunits3.text())), ('spatial_replication_level_4_extent', self.ext(self.ckSpatialextent4.isChecked(), self.lnedSpatialextent4.text(), self.lnedSpatialextentunits4.text())), ('spatial_replication_level_5_extent', self.ext(self.ckSpatialextent5.isChecked(), self.lnedSpatialextent5.text(), self.lnedSpatialextentunits5.text())), ('treatment_type_1', self.treatments(self.cboxTreatment1.currentText() != 'NULL', self.cboxTreatment1.currentText(), None)), ('treatment_type_2', self.treatments(self.cboxTreatment2.currentText() != 'NULL', self.cboxTreatment2.currentText(), None)), ('treatment_type_3', self.treatments(self.cboxTreatment3.currentText() != 'NULL', self.cboxTreatment3.currentText(), None)), ('control_group', self.treatments( self.ckControlgroup.isChecked(), self.lnedControlgroup.text(), None)), ('derived', self.derived(self.cboxDerived.currentText() != 'NULL', self.cboxDerived.currentText(), None)), ('authors', self.contacts(self.lnedAuthor.text() != '', self.lnedAuthor.text(), None)), ('authors_contact', self.contacts(self.lnedContact.text() != '', self.lnedContact.text(), None)))) self.mainini = ini.InputHandler(name='maininfo', tablename='project_table', lnedentry=self.form_entries) self.facade.input_register(self.mainini) try: self.maindirector = self.facade.make_table('maininfo') except Exception as e: print(str(e)) self.error.showMessage(str(e)) self.facade.create_log_record('project_table') self._log = self.facade._tablelog['project_table'] self.project_table = self.maindirector._availdf.copy() try: check_list = [ 'authors', 'authors_contact', 'studytype', 'derived', 'community', 'samplefreq', 'datatype' ] record = None for i, item in enumerate(check_list): print(item, ': ', self.form_entries[item].entry) record = item assert (self.form_entries[item].entry != 'NULL') is True assert (self.form_entries[item].entry != '') is True if sender is self.btnPreview: self.mainmodel = self.viewEdit(self.project_table) self.preview.tabviewPreview.setModel(self.mainmodel) self.preview.show() return else: pass self.facade.push_tables['project_table'] = self.project_table self._log.debug( 'project_table mod: ' + ' '.join(self.project_table.columns.values.tolist())) orm.convert_types(self.project_table, orm.project_types) hlp.write_column_to_log(self.form_entries, self._log, 'project_table') self.close() except Exception as e: print(str(e)) self.error.showMessage('Invalid entry: ' + record)
def submit_change(self): ''' Method to get data from user form and make project table to upload ''' sender = self.sender() self.form_entries = OrderedDict(( ('samplingunits', self.dtype( self.lnedDatatypeunits.text() != '', self.lnedDatatypeunits.text(), None )), ('datatype', self.dtype( self.cboxDatatype.currentText() != '', self.cboxDatatype.currentText(), None )), ('structured_type_1', self.structure( self.ckStructure1.isChecked(), self.lnedStructure1.text(), self.lnedStructureunits1.text() )), ('structured_type_2', self.structure( self.ckStructure2.isChecked(), self.lnedStructure2.text(), self.lnedStructureunits2.text() )), ('structured_type_3', self.structure( self.ckStructure3.isChecked(), self.lnedStructure3.text(), self.lnedStructureunits3.text() )), ('samplefreq', self.sampfreq( self.cboxSamplingfrequency.currentText() != 'NULL', self.cboxSamplingfrequency.currentText(), None )), ('studytype', self.studytype( self.cboxStudytype.currentText() != 'NULL', self.cboxStudytype.currentText(), None )), ('community', self.community( True, ( 'yes' if self.rbtnCommunityyes.isChecked() is True else 'no' ), None )), ('spatial_replication_level_1_extent', self.ext( self.ckSpatialextent1.isChecked(), self.lnedSpatialextent1.text(), self.lnedSpatialextentunits1.text() )), ('spatial_replication_level_2_extent', self.ext( self.ckSpatialextent2.isChecked(), self.lnedSpatialextent2.text(), self.lnedSpatialextentunits2.text() )), ('spatial_replication_level_3_extent', self.ext( self.ckSpatialextent3.isChecked(), self.lnedSpatialextent3.text(), self.lnedSpatialextentunits3.text() )), ('spatial_replication_level_4_extent', self.ext( self.ckSpatialextent4.isChecked(), self.lnedSpatialextent4.text(), self.lnedSpatialextentunits4.text() )), ('spatial_replication_level_5_extent', self.ext( self.ckSpatialextent5.isChecked(), self.lnedSpatialextent5.text(), self.lnedSpatialextentunits5.text() )), ('treatment_type_1', self.treatments( self.cboxTreatment1.currentText() != 'NULL', self.cboxTreatment1.currentText(), None )), ('treatment_type_2', self.treatments( self.cboxTreatment2.currentText() != 'NULL', self.cboxTreatment2.currentText(), None )), ('treatment_type_3', self.treatments( self.cboxTreatment3.currentText() != 'NULL', self.cboxTreatment3.currentText(), None )), ('control_group', self.treatments( self.ckControlgroup.isChecked(), self.lnedControlgroup.text(), None )), ('derived', self.derived( self.cboxDerived.currentText() != 'NULL', self.cboxDerived.currentText(), None )), ('authors', self.contacts( self.lnedAuthor.text() != '', self.lnedAuthor.text(), None )), ('authors_contact', self.contacts( self.lnedContact.text() != '', self.lnedContact.text(), None )) )) self.mainini = ini.InputHandler( name='maininfo', tablename='project_table', lnedentry=self.form_entries ) self.facade.input_register(self.mainini) try: self.maindirector = self.facade.make_table('maininfo') except Exception as e: print(str(e)) self.error.showMessage(str(e)) self.facade.create_log_record('project_table') self._log = self.facade._tablelog['project_table'] self.project_table = self.maindirector._availdf.copy() try: check_list = [ 'authors', 'authors_contact', 'studytype', 'derived', 'community', 'samplefreq', 'datatype' ] record = None for i, item in enumerate(check_list): print(item, ': ', self.form_entries[item].entry) record = item assert ( self.form_entries[item].entry != 'NULL') is True assert ( self.form_entries[item].entry != '') is True if sender is self.btnPreview: self.mainmodel = self.viewEdit(self.project_table) self.preview.tabviewPreview.setModel(self.mainmodel) self.preview.show() return else: pass self.facade.push_tables['project_table'] = self.project_table self._log.debug( 'project_table mod: ' + ' '.join(self.project_table.columns.values.tolist())) orm.convert_types(self.project_table, orm.project_types) hlp.write_column_to_log( self.form_entries, self._log, 'project_table') self.close() except Exception as e: print(str(e)) self.error.showMessage( 'Invalid entry: ' + record )
def test( Facade_push, site_handle_1_count, file_handle_1_count, meta_handle_1_count, project_handle_1_count, taxa_handle_1_count, time_handle_1_count, count_handle_1_count, covar_handle_1_count): facade = Facade_push() facade.input_register(meta_handle_1_count) facade.meta_verify() facade.input_register(file_handle_1_count) facade.load_data() facade.input_register(site_handle_1_count) sitedirector = facade.make_table('siteinfo') study_site_table = sitedirector._availdf study_site_table['lter_table_fkey'] = facade._valueregister[ 'globalid'] = meta_handle_1_count.lnedentry['lter'] print(study_site_table) facade.push_tables['study_site_table'] = study_site_table siteid = site_handle_1_count.lnedentry['study_site_key'] sitelevels = facade._data[ siteid].drop_duplicates().values.tolist() facade.register_site_levels(sitelevels) facade.input_register(project_handle_1_count) maindirector = facade.make_table('maininfo') project_table = maindirector._availdf.copy().reset_index(drop=True) orm.convert_types(project_table, orm.project_types) facade.push_tables['project_table'] = project_table print('project table: ', project_table) facade.input_register(taxa_handle_1_count) taxadirector = facade.make_table('taxainfo') facade.push_tables['taxa_table'] = taxadirector._availdf facade.input_register(time_handle_1_count) timetable = tparse.TimeParse( facade._data, time_handle_1_count.lnedentry).formater() timetable_og_cols = timetable.columns.values.tolist() timetable.columns = [x+'_derived' for x in timetable_og_cols] observationdf = facade._data observation_time_df = concat([timetable, observationdf], axis=1) facade.push_tables['timetable'] = observation_time_df facade.input_register(count_handle_1_count) rawdirector = facade.make_table('rawinfo') facade.push_tables[facade._inputs[ 'rawinfo'].tablename] = rawdirector._availdf facade.input_register(covar_handle_1_count) covartable = ddf.DictionaryDataframe( facade._data, covar_handle_1_count.lnedentry['columns']).convert_records() facade.push_tables['covariates'] = covartable facade._valueregister['globalid'] = meta_handle_1_count.lnedentry['globalid'] facade._valueregister['lter'] = meta_handle_1_count.lnedentry['lter'] facade._valueregister['siteid'] = facade._inputs[ 'siteinfo'].lnedentry['study_site_key'] facade.push_merged_data()
def merge_for_datatype_table_upload( self, raw_dataframe, formated_dataframe, formated_dataframe_name, covariate_dataframe, siteinprojkeydf, raw_data_taxa_columns, uploaded_taxa_columns): print('start dtype upload') orm.replace_numeric_null_with_string(raw_dataframe) orm.replace_numeric_null_with_string(formated_dataframe) print('replacing nulls is a pain') # Step 2) Query taxa_table to get the auto generated # primary keys returned. Turn query data into # dataframe. session = self.session taxa_key_statement = session.execute( select([orm.taxa_table]). where( orm.taxa_table.__table__.c.site_in_project_taxa_key.in_ (siteinprojkeydf['site_in_project_key'].values.tolist()) ) ) session.close() taxa_key_df = DataFrame(taxa_key_statement.fetchall()) taxa_key_df.columns = taxa_key_statement.keys() taxa_key_df.replace({None: 'NA'}, inplace=True) # Step 3) Subsetting the query tabled for record that only pertain # to the count data (because we will be subsetting from this # queried taxa table later) dtype_subset_taxa_key_df = taxa_key_df[ taxa_key_df['site_in_project_taxa_key'].isin( siteinprojkeydf['site_in_project_key'])] # Step 4) Merge the taxa_table query results with # the site_in_project table query that was performed # to upload the taxa_table (see above). This gives # you a table with site names and taxonomic information # allowing for a merge with the original dtype data tbl_dtype_merged_taxakey_siteinprojectkey = merge( dtype_subset_taxa_key_df, siteinprojkeydf, left_on='site_in_project_taxa_key', right_on='site_in_project_key', how='inner') raw_dataframe_siteinproj = merge( raw_dataframe, siteinprojkeydf, left_on=self.sitelabel, right_on='study_site_table_fkey', sort=False, how='left') raw_data_taxa_columns.append('site_in_project_key') uploaded_taxa_columns.append('site_in_project_taxa_key') raw_data_taxa_columns.append('site_in_project_key') uploaded_taxa_columns.append('site_in_project_taxa_key') print('updated raw data col list: ', raw_dataframe_siteinproj) print('update taxa data col list: ', uploaded_taxa_columns) # Step 5) Merge the original dtype data with the # merged taxa_table query to have all foreign keys... # taxa and site_project # matched up with the original observations. dtype_merged_with_taxa_and_siteinproj_key = merge( raw_dataframe_siteinproj, tbl_dtype_merged_taxakey_siteinprojectkey, left_on=list(raw_data_taxa_columns), right_on=list(uploaded_taxa_columns), how='left') # Step 6) Take the merged original data with all foreign keys, # and merged that with the formatted dtype_table based on index # values (order or records should not changed from the original data # to the formatted data) tbl_dtype_merged_with_all_keys = merge( formated_dataframe, dtype_merged_with_taxa_and_siteinproj_key, left_index=True, right_index=True, how='inner', suffixes=('', '_y')) # Step 7) List the columns that will be needed to push the # dtype table to the database (including foreign keys) tbl_dtype_columns_to_upload = [ 'taxa_table_key', 'site_in_project_taxa_key', 'year_derived', 'month_derived', 'day_derived', 'spatial_replication_level_1', 'spatial_replication_level_2', 'spatial_replication_level_3', 'spatial_replication_level_4', 'spatial_replication_level_5', 'structure_type_1', 'structure_type_2', 'structure_type_3', 'structure_type_4', 'treatment_type_1', 'treatment_type_2', 'treatment_type_3', 'covariates' ] time_cols_rename = { 'year_derived': 'year', 'month_derived': 'month', 'day_derived': 'day' } tbl_dtype_columns_to_upload.append( '{}_observation'.format(str(formated_dataframe_name))) tbl_dtype_merged_with_all_keys = concat( [tbl_dtype_merged_with_all_keys, covariate_dataframe], axis=1) # Step 8) Subsetting the fully merged dtype table data tbl_dtype_to_upload = tbl_dtype_merged_with_all_keys[ tbl_dtype_columns_to_upload] tbl_dtype_to_upload.rename( columns=time_cols_rename, inplace=True ) # Step 9) Renaming columns to reflect that in database table # And converting data types tbl_dtype_to_upload.rename(columns={ 'taxa_table_key': 'taxa_{}_fkey'.format(str(formated_dataframe_name))} , inplace=True) datatype_key = 'site_in_project_{}_fkey'.format(str(formated_dataframe_name)) tbl_dtype_to_upload.rename(columns={ 'site_in_project_taxa_key': datatype_key}, inplace=True) tbl_dtype_to_upload.fillna('NA', inplace=True) self.formateddata = tbl_dtype_to_upload # Step 10) Uploading to the database datatype_table = '{}_table'.format(str(formated_dataframe_name)) datatype_obs = '{}_observation'.format(str(formated_dataframe_name)) print('push raw_before', tbl_dtype_to_upload.columns) tbl_dtype_to_upload[datatype_obs] = to_numeric( tbl_dtype_to_upload[datatype_obs], errors='coerce' ) text_cols = [ 'spatial_replication_level_1', 'spatial_replication_level_2', 'spatial_replication_level_3', 'spatial_replication_level_4', 'spatial_replication_level_5', 'treatment_type_1', 'treatment_type_2', 'treatment_type_3', 'structure_type_1', 'structure_type_2', 'structure_type_3', 'structure_type_4' ] tbl_dtype_to_upload[text_cols] = tbl_dtype_to_upload[ text_cols].applymap(str) tbl_dtype_to_upload[text_cols] = tbl_dtype_to_upload[ text_cols].applymap(lambda x: x.strip()) print(tbl_dtype_to_upload.dtypes) print(self.table_types[datatype_table]) try: orm.convert_types(tbl_dtype_to_upload, self.table_types[datatype_table]) except Exception as e: print('converting issues: ', str(e)) print('push raw_after', tbl_dtype_to_upload.columns) print(tbl_dtype_to_upload.dtypes) print('this should have worked') other_numerics = [ 'year', 'month', 'day', datatype_key, 'taxa_{}_fkey'.format(str(formated_dataframe_name)) ] tbl_dtype_to_upload[datatype_obs] = to_numeric( tbl_dtype_to_upload[datatype_obs], errors='coerce' ) try: tbl_dtype_to_upload[other_numerics].replace({'NA', -99999}, inplace=True) except Exception as e: print('No NAs to replace:', str(e)) try: tbl_dtype_to_upload[other_numerics].replace({'NaN' -99999}, inplace=True) except Exception as e: print('No NaN to replace:', str(e)) try: tbl_dtype_to_upload[other_numerics].replace({None, -99999}, inplace=True) except Exception as e: print('No None to replace:', str(e)) tbl_dtype_to_upload[other_numerics].fillna(-99999, inplace=True) tbl_dtype_to_upload.loc[:, other_numerics] = tbl_dtype_to_upload.loc[ :, other_numerics].apply(to_numeric, errors='coerce') metadata_key_column_name = 'metadata_{}_key'.format( formated_dataframe_name) tbl_dtype_to_upload[metadata_key_column_name] = int(self.metadata_key) # Attempting direct copy_from to copy_to commands # with stringIO (should be faster than pandas) # text buffer sql_datatype_columns = tbl_dtype_to_upload.columns.values.tolist() s_buf = io.StringIO() tbl_dtype_to_upload.to_csv(s_buf, index=False, sep="\t") s_buf.seek(0) session = orm.Session() cur = session.connection().connection.cursor() copy_sql_statement = "COPY {}({}) FROM STDIN WITH CSV HEADER DELIMITER AS '\t'".format( datatype_table, ", ".join(sql_datatype_columns)) cur.copy_expert(copy_sql_statement, s_buf) session.commit() session.close() #tbl_dtype_to_upload.to_sql( # datatype_table, # orm.conn, if_exists='append', index=False, chunksize=1000) print('past datatype upload')
def merge_for_taxa_table_upload( self, formated_taxa_table, siteinprojkeydf, sitelabel ): ''' Method to take the data stored in the user facade class and upload the database REQUIRES: formated taxa table (with database names), dataframe with site levels merged to site_in_project (formated_taxa_table) primary keys (created in method above...siteinprojkeydf) , and the name of the column with the site (sitelabel) to make the site_in_project_table (including the merge) as well as merge all other tables for keys and upload. To merge the site_in_project_table a series of steps must be performed 1) Merged the formated taxa table with the site_in_project data that contains primary keys for site_in_project table and site levels (merge on site levels) 2) Dropping columns not neccessary for the taxa_table push (metadata key: project_table_fkey, site label:study_site_table_fkey) 3) Rename merged site_in_project_table primary key to match taxa table name 4) Push taxa_table to database ''' print('starting taxa table upload') orm.replace_numeric_null_with_string(formated_taxa_table) print('past orm replace numeric') print('siteingproj key df: ', siteinprojkeydf) print('siteingproj key df: ', siteinprojkeydf.columns) print('formatted taxa df: ', formated_taxa_table) print('formatted taxa df: ', formated_taxa_table.columns) tbl_taxa_with_site_in_proj_key = merge( formated_taxa_table, siteinprojkeydf, left_on=sitelabel, right_on='study_site_table_fkey', how='inner') print('past tbl_taxa site in proj key') tbl_taxa_merged = tbl_taxa_with_site_in_proj_key.copy() tbl_taxa_merged.drop([ 'study_site_table_fkey', sitelabel, 'project_table_fkey'], inplace=True, axis=1) print('past tbl_taxa drop: ', tbl_taxa_merged) tbl_taxa_merged.rename( columns={ 'site_in_project_key': 'site_in_project_taxa_key'}, inplace=True) print('merge class taxa merged: ', tbl_taxa_merged) tbl_taxa_merged.fillna('NA', inplace=True) try: orm.convert_types(tbl_taxa_merged, orm.taxa_types) except Exception as e: print('converting issues: ', str(e)) session = self.session site_in_proj_key_query = session.execute( select( [orm.taxa_table.__table__.c.site_in_project_taxa_key] ). distinct() ) session.close() check_site_in_proj_keys = DataFrame(site_in_proj_key_query.fetchall()) if check_site_in_proj_keys.empty: check_site_in_proj_keys = [] else: check_site_in_proj_keys = check_site_in_proj_keys[0].values.tolist() if all( x in check_site_in_proj_keys for x in siteinprojkeydf['site_in_project_key'].values.tolist()): pass else: taxacolumns = [ 'site_in_project_taxa_key', 'sppcode', 'kingdom', 'subkingdom', 'infrakingdom', 'superdivision', 'division', 'subdivision', 'superphylum', 'phylum', 'subphylum', 'clss', 'subclass', 'ordr', 'family', 'genus', 'species', 'common_name', 'authority', 'metadata_taxa_key' ] tbl_taxa_merged['metadata_taxa_key'] = int(self.metadata_key) tbl_taxa_merged[taxacolumns].to_sql( 'taxa_table', orm.conn, if_exists='append', index=False)
'study_site_key': study_site_key, 'lter_table_fkey': site_dict['lterid'][1], 'lat_study_site': site_lat, 'lng_study_site': site_lng }) if 'descript' not in site_dict.keys(): sitetable['descript'] = ['NA'] * len(sitetable) else: try: sitetable['descript'] = site_dict['descript'][1] except Exception as e: sitetable['descript'] = ['NA'] * len(sitetable) print('descript not recorded') try: orm.convert_types(sitetable, orm.study_site_types) except Exception as e: print('converting issues: ', str(e)) study_site_table_numeric_columns = ['lat_study_site', 'lng_study_site'] # convert datatype to string/object sitetable[sitetable.columns.difference( study_site_table_numeric_columns)] = sitetable[ sitetable.columns.difference( study_site_table_numeric_columns)].applymap(str) # Strip strings of leading and trailing whitespace sitetable[sitetable.columns.difference( study_site_table_numeric_columns)] = sitetable[ sitetable.columns.difference(study_site_table_numeric_columns )].applymap(lambda x: x.strip())