def test_site_in_project_key_number_two(MergeToUpload, site_handle2,
                                        file_handle2, meta_handle2,
                                        project_handle2, taxa_handle2,
                                        time_handle2, count_handle2,
                                        covar_handle2):
    facade = face.Facade()

    facade.input_register(meta_handle2)
    facade.meta_verify()

    facade.input_register(file_handle2)
    facade.load_data()

    facade.input_register(site_handle2)
    sitedirector = facade.make_table('siteinfo')
    study_site_table = sitedirector._availdf

    siteid = site_handle2.lnedentry['study_site_key']
    sitelevels = facade._data[siteid].drop_duplicates().values.tolist()
    facade.register_site_levels(sitelevels)
    print('test2 sitelevels: ', sitelevels)
    facade._valueregister['siteid'] = siteid

    print('study_site_table (test): ', study_site_table)

    facade.create_log_record('study_site_table')
    lter = meta_handle2.lnedentry['lter']
    ltercol = produce_null_df(1, ['lter_table_fkey'], len(study_site_table),
                              lter)
    study_site_table = concat([study_site_table, ltercol], axis=1)
    study_site_table_og_col = study_site_table.columns.values.tolist()

    study_site_table_single = study_site_table.iloc[0, :]

    study_site_table_single_df = DataFrame([study_site_table_single])
    study_site_table_single_df.columns = study_site_table_og_col

    print('study site single: ', study_site_table_single)

    study_site_table_single_df.loc[0, 'study_site_key'] = 'NULL'

    print('study_site_table: ', study_site_table_single_df)

    facade.push_tables['study_site_table'] = study_site_table_single_df

    facade.input_register(project_handle2)
    maindirector = facade.make_table('maininfo')
    project_table = maindirector._availdf.copy().reset_index(drop=True)
    orm.convert_types(project_table, orm.project_types)

    facade.push_tables['project_table'] = project_table
    facade.create_log_record('project_table')

    facade.input_register(taxa_handle2)
    taxadirector = facade.make_table('taxainfo')

    taxa_table = taxadirector._availdf
    facade.push_tables['taxa_table'] = taxa_table
    print('taxa columns after make taxa table: ', taxa_table.columns)

    facade.create_log_record('taxa_table')

    print('taxa columns before time_table: ', taxa_table.columns)

    facade.input_register(time_handle2)
    timetable = tparse.TimeParse(facade._data,
                                 time_handle2.lnedentry).formater()
    facade.push_tables['timetable'] = timetable
    facade.create_log_record('timetable')

    print('taxa columns before count_table: ', taxa_table.columns)
    facade.input_register(count_handle2)
    rawdirector = facade.make_table('rawinfo')
    rawtable = rawdirector._availdf
    print(rawtable)
    facade.push_tables[count_handle2.tablename] = rawtable
    facade.create_log_record(count_handle2.tablename)

    print('taxa columns before covar_table: ', taxa_table.columns)
    facade.input_register(covar_handle2)
    covartable = ddf.DictionaryDataframe(
        facade._data, covar_handle2.lnedentry['columns']).convert_records()
    facade.push_tables['covariates'] = covartable
    facade.create_log_record('covartable')

    facade._valueregister['globalid'] = meta_handle2.lnedentry['globalid']
    facade._valueregister['lter'] = meta_handle2.lnedentry['lter']
    facade._valueregister['siteid'] = siteid

    timetable_og_cols = timetable.columns.values.tolist()
    timetable.columns = [x + '_derived' for x in timetable_og_cols]
    observationdf = facade._data
    observation_time_df = concat([timetable, observationdf], axis=1)

    print('merge class obs_time columns: ', observation_time_df.columns)
    print('merge class project table: ', project_table)

    study_site_table.to_sql('study_site_table',
                            orm.conn,
                            if_exists='append',
                            index=False)
    project_table['lter_project_fkey'] = facade._valueregister['lter']
    project_table.to_sql('project_table',
                         orm.conn,
                         if_exists='append',
                         index=False)

    print('taxa columns before site_in_proj method: ', taxa_table.columns)

    merge_object = MergeToUpload()
    site_in_project_key_df = merge_object.site_in_proj_key_df(
        studysitetabledf=study_site_table,
        projecttabledf=project_table,
        observationtabledf=observation_time_df,
        lterlocation=facade._valueregister['lter'],
        studysitelabel=siteid,
        studysitelevels=sitelevels)

    print('taxa columns before user taxa merge method: ', taxa_table.columns)
    merge_object.merge_for_taxa_table_upload(
        formated_taxa_table=taxa_table,
        siteinprojkeydf=site_in_project_key_df,
        sitelabel=siteid)

    taxa_column_in_data = [
        x[0] for x in list(facade._inputs['taxainfo'].lnedentry.items())
    ]

    taxa_column_in_push_table = [
        x[1] for x in list(facade._inputs['taxainfo'].lnedentry.items())
    ]

    merge_object.merge_for_datatype_table_upload(
        raw_dataframe=observation_time_df,
        formated_dataframe=rawtable,
        formated_dataframe_name='{}'.format(
            re.sub('_table', '', facade._inputs['rawinfo'].tablename)),
        covariate_dataframe=covartable,
        siteinprojkeydf=site_in_project_key_df,
        raw_data_taxa_columns=taxa_column_in_data,
        uploaded_taxa_columns=taxa_column_in_push_table)
def test_site_in_project_key(
    MergeToUpload,
    site_handle_4_percent_cover,
    file_handle_4_percent_cover,
    meta_handle_4_percent_cover,
    project_handle_4_percent_cover,
    taxa_handle_4_percent_cover,
    time_handle_4_percent_cover,
    biomass_handle_4_percent_cover,
    covar_handle_4_percent_cover,
):
    facade = face.Facade()

    facade.input_register(meta_handle_4_percent_cover)
    facade.meta_verify()

    facade.input_register(file_handle_4_percent_cover)
    facade.load_data()

    facade.input_register(site_handle_4_percent_cover)
    sitedirector = facade.make_table("siteinfo")
    study_site_table = sitedirector._availdf

    print("study_site_table (test): ", study_site_table)

    facade.create_log_record("study_site_table")
    lter = meta_handle_4_percent_cover.lnedentry["lter"]
    ltercol = produce_null_df(1, ["lter_table_fkey"], len(study_site_table), lter)
    study_site_table = concat([study_site_table, ltercol], axis=1)
    print("study_site_table: ", study_site_table)
    facade.push_tables["study_site_table"] = study_site_table

    siteid = site_handle_4_percent_cover.lnedentry["study_site_key"]
    sitelevels = facade._data[siteid].drop_duplicates().values.tolist()
    facade.register_site_levels(sitelevels)
    facade._valueregister["siteid"] = siteid

    facade.input_register(project_handle_4_percent_cover)
    maindirector = facade.make_table("maininfo")
    project_table = maindirector._availdf.copy().reset_index(drop=True)
    orm.convert_types(project_table, orm.project_types)

    facade.push_tables["project_table"] = project_table
    facade.create_log_record("project_table")

    facade.input_register(taxa_handle_4_percent_cover)
    taxadirector = facade.make_table("taxainfo")
    taxa_table = taxadirector._availdf
    facade.push_tables["taxa_table"] = taxa_table
    facade.create_log_record("taxa_table")

    facade.input_register(time_handle_4_percent_cover)
    timetable = tparse.TimeParse(facade._data, time_handle_4_percent_cover.lnedentry).formater()
    facade.push_tables["timetable"] = timetable
    facade.create_log_record("timetable")

    facade.input_register(biomass_handle_4_percent_cover)
    rawdirector = facade.make_table("rawinfo")
    rawtable = rawdirector._availdf
    print(rawtable)
    facade.push_tables[biomass_handle_4_percent_cover.tablename] = rawtable
    facade.create_log_record(biomass_handle_4_percent_cover.tablename)

    facade.input_register(covar_handle_4_percent_cover)
    covartable = ddf.DictionaryDataframe(
        facade._data, covar_handle_4_percent_cover.lnedentry["columns"]
    ).convert_records()
    facade.push_tables["covariates"] = covartable
    facade.create_log_record("covartable")

    facade._valueregister["globalid"] = meta_handle_4_percent_cover.lnedentry["globalid"]
    facade._valueregister["lter"] = meta_handle_4_percent_cover.lnedentry["lter"]
    facade._valueregister["siteid"] = siteid

    timetable_og_cols = timetable.columns.values.tolist()
    timetable.columns = [x + "_derived" for x in timetable_og_cols]
    observationdf = facade._data
    observation_time_df = concat([timetable, observationdf], axis=1)

    print("merge class obs_time columns: ", observation_time_df.columns)
    print("merge class project table: ", project_table)

    try:
        study_site_table.to_sql("study_site_table", orm.conn, if_exists="append", index=False)
    except Exception as e:
        print("Sites in db: ", str(e))
    project_table["lter_project_fkey"] = facade._valueregister["lter"]
    project_table.to_sql("project_table", orm.conn, if_exists="append", index=False)

    merge_object = MergeToUpload()
    site_in_project_key_df = merge_object.site_in_proj_key_df(
        studysitetabledf=study_site_table,
        projecttabledf=project_table,
        observationtabledf=observation_time_df,
        lterlocation=facade._valueregister["lter"],
        studysitelabel=siteid,
        studysitelevels=sitelevels,
    )

    merge_object.merge_for_taxa_table_upload(
        formated_taxa_table=taxa_table, siteinprojkeydf=site_in_project_key_df, sitelabel=siteid
    )

    taxa_column_in_push_table = [x[0] for x in list(facade._inputs["taxainfo"].lnedentry.items())]

    taxa_column_in_data = [x[1] for x in list(facade._inputs["taxainfo"].lnedentry.items())]

    merge_object.merge_for_datatype_table_upload(
        raw_dataframe=observation_time_df,
        formated_dataframe=rawtable,
        formated_dataframe_name="{}".format(re.sub("_table", "", facade._inputs["rawinfo"].tablename)),
        covariate_dataframe=covartable,
        siteinprojkeydf=site_in_project_key_df,
        raw_data_taxa_columns=taxa_column_in_data,
        uploaded_taxa_columns=taxa_column_in_push_table,
    )

    obs_columns_in_data = [x[1] for x in list(facade._inputs["rawinfo"].lnedentry.items())]
    obs_columns_in_push_table = [x[0] for x in list(facade._inputs["rawinfo"].lnedentry.items())]
    merge_object.update_project_table(
        spatial_rep_columns_from_og_df=obs_columns_in_data,
        spatial_rep_columns_from_formated_df=obs_columns_in_push_table,
    )
def test_site_in_project_key(
        MergeToUpload, site_handle_3_biomass, file_handle_3_biomass,
        meta_handle_3_biomass, project_handle_3_biomass, taxa_handle_3_biomass,
        time_handle_3_biomass, count_handle_3_biomass, covar_handle_3_biomass):
    facade = face.Facade()

    facade.input_register(meta_handle_3_biomass)
    facade.meta_verify()

    facade.input_register(file_handle_3_biomass)
    facade.load_data()

    facade.input_register(site_handle_3_biomass)
    sitedirector = facade.make_table('siteinfo')
    study_site_table = sitedirector._availdf

    print('study_site_table (test): ', study_site_table)

    facade.create_log_record('study_site_table')
    lter = meta_handle_3_biomass.lnedentry['lter']
    ltercol = produce_null_df(1, [
        'lter_table_fkey'], len(study_site_table), lter)
    study_site_table = concat([study_site_table, ltercol], axis=1)
    print('study_site_table: ', study_site_table)
    facade.push_tables['study_site_table'] = study_site_table
    
    siteid = site_handle_3_biomass.lnedentry['study_site_key']
    sitelevels = facade._data[
        siteid].drop_duplicates().values.tolist()
    facade.register_site_levels(sitelevels)
    facade._valueregister['siteid'] = siteid

    facade.input_register(project_handle_3_biomass)
    maindirector = facade.make_table('maininfo')
    project_table = maindirector._availdf.copy().reset_index(drop=True)
    orm.convert_types(project_table, orm.project_types)
    
    facade.push_tables['project_table'] = project_table
    facade.create_log_record('project_table')
    
    facade.input_register(taxa_handle_3_biomass)
    taxadirector = facade.make_table('taxainfo')
    taxa_table = taxadirector._availdf
    facade.push_tables['taxa_table'] = taxa_table
    facade.create_log_record('taxa_table')
    
    facade.input_register(time_handle_3_biomass)
    timetable = tparse.TimeParse(
        facade._data, time_handle_3_biomass.lnedentry).formater()
    facade.push_tables['timetable'] = timetable
    facade.create_log_record('timetable')

    facade.input_register(count_handle_3_biomass)
    rawdirector = facade.make_table('rawinfo')
    rawtable = rawdirector._availdf
    print('rawtable facade tmaker: ', rawtable)
    facade.push_tables[count_handle_3_biomass.tablename] = rawtable
    facade.create_log_record(count_handle_3_biomass.tablename)

    facade.input_register(covar_handle_3_biomass)
    covartable = ddf.DictionaryDataframe(
        facade._data,
        covar_handle_3_biomass.lnedentry['columns']).convert_records()
    facade.push_tables['covariates'] = covartable
    facade.create_log_record('covartable')

    facade._valueregister['globalid'] = meta_handle_3_biomass.lnedentry['globalid']
    facade._valueregister['lter'] = meta_handle_3_biomass.lnedentry['lter']
    facade._valueregister['siteid'] = siteid

    timetable_og_cols = timetable.columns.values.tolist()
    timetable.columns = [x+'_derived' for x in timetable_og_cols]
    observationdf = facade._data
    observation_time_df = concat([timetable,observationdf], axis=1 )
    print('merge class obs_time df: ', observation_time_df)
    print('merge class obs_time columns: ', observation_time_df.columns)
    print('merge class project table: ', project_table)

    try:
        study_site_table.to_sql(
            'study_site_table',
            orm.conn, if_exists='append', index=False)
    except Exception as e:
        print('Sites in db: ', str(e))
    project_table['lter_project_fkey'] = facade._valueregister['lter']
    project_table.to_sql(
        'project_table', orm.conn,
        if_exists='append', index=False
    )

    merge_object = MergeToUpload()
    site_in_project_key_df = merge_object.site_in_proj_key_df(
        studysitetabledf=study_site_table,
        projecttabledf=project_table,
        observationtabledf=observation_time_df,
        lterlocation= facade._valueregister['lter'],
        studysitelabel=siteid,
        studysitelevels=sitelevels
    )

    merge_object.merge_for_taxa_table_upload(
        formated_taxa_table=taxa_table,
        siteinprojkeydf=site_in_project_key_df,
        sitelabel=siteid
    )

    taxa_column_in_data = [
        x[1] for x in 
        list(facade._inputs['taxainfo'].lnedentry.items())
    ]

    taxa_column_in_push_table = [
        x[0] for x in 
        list(facade._inputs['taxainfo'].lnedentry.items())
    ]

    merge_object.merge_for_datatype_table_upload(
        raw_dataframe=observation_time_df,
        formated_dataframe=rawtable,
        formated_dataframe_name=
        '{}'.format(
                    re.sub('_table', '', facade._inputs['rawinfo'].tablename)),
        covariate_dataframe = covartable,
        siteinprojkeydf=site_in_project_key_df,
        raw_data_taxa_columns=taxa_column_in_data,
        uploaded_taxa_columns=taxa_column_in_push_table
    )

    obs_columns_in_data = [
        x[1] for x in 
        list(facade._inputs['rawinfo'].lnedentry.items())
    ]
    obs_columns_in_push_table = [
        x[0] for x in 
        list(facade._inputs['rawinfo'].lnedentry.items())
    ]
    merge_object.update_project_table(
        spatial_rep_columns_from_og_df=obs_columns_in_data,
        spatial_rep_columns_from_formated_df=obs_columns_in_push_table)
def test(Facade_push, site_handle_1_count, file_handle_1_count,
         meta_handle_1_count, project_handle_1_count, taxa_handle_1_count,
         time_handle_1_count, count_handle_1_count, covar_handle_1_count):
    facade = Facade_push()
    facade.input_register(meta_handle_1_count)
    facade.meta_verify()

    facade.input_register(file_handle_1_count)
    facade.load_data()

    facade.input_register(site_handle_1_count)
    sitedirector = facade.make_table('siteinfo')
    study_site_table = sitedirector._availdf

    study_site_table['lter_table_fkey'] = facade._valueregister[
        'globalid'] = meta_handle_1_count.lnedentry['lter']
    print(study_site_table)

    facade.push_tables['study_site_table'] = study_site_table

    siteid = site_handle_1_count.lnedentry['study_site_key']
    sitelevels = facade._data[siteid].drop_duplicates().values.tolist()
    facade.register_site_levels(sitelevels)

    facade.input_register(project_handle_1_count)
    maindirector = facade.make_table('maininfo')
    project_table = maindirector._availdf.copy().reset_index(drop=True)
    orm.convert_types(project_table, orm.project_types)
    facade.push_tables['project_table'] = project_table
    print('project table: ', project_table)

    facade.input_register(taxa_handle_1_count)
    taxadirector = facade.make_table('taxainfo')
    facade.push_tables['taxa_table'] = taxadirector._availdf

    facade.input_register(time_handle_1_count)
    timetable = tparse.TimeParse(facade._data,
                                 time_handle_1_count.lnedentry).formater()
    timetable_og_cols = timetable.columns.values.tolist()
    timetable.columns = [x + '_derived' for x in timetable_og_cols]
    observationdf = facade._data
    observation_time_df = concat([timetable, observationdf], axis=1)

    facade.push_tables['timetable'] = observation_time_df

    facade.input_register(count_handle_1_count)
    rawdirector = facade.make_table('rawinfo')
    facade.push_tables[
        facade._inputs['rawinfo'].tablename] = rawdirector._availdf

    facade.input_register(covar_handle_1_count)
    covartable = ddf.DictionaryDataframe(
        facade._data,
        covar_handle_1_count.lnedentry['columns']).convert_records()
    facade.push_tables['covariates'] = covartable

    facade._valueregister['globalid'] = meta_handle_1_count.lnedentry[
        'globalid']
    facade._valueregister['lter'] = meta_handle_1_count.lnedentry['lter']
    facade._valueregister['siteid'] = facade._inputs['siteinfo'].lnedentry[
        'study_site_key']

    facade.push_merged_data()
Ejemplo n.º 5
0
def test_site_in_project_key(MergeToUpload, site_handle_corner_case,
                             file_handle_corner_case, meta_handle_corner_case,
                             project_handle_corner_case,
                             taxa_handle_corner_case, time_handle_corner_case,
                             percent_cover_handle_corner_case,
                             covar_handle_corner_case):
    facade = face.Facade()

    facade.input_register(meta_handle_corner_case)
    facade.meta_verify()

    facade.input_register(file_handle_corner_case)
    facade.load_data()

    siteid = site_handle_corner_case.lnedentry['study_site_key']

    facade._data[siteid].replace(
        {
            'C': 'site_jrn_zone_creosotebush',
            'G': 'site_jrn_zone_grassland'
        },
        inplace=True)

    facade.input_register(site_handle_corner_case)
    sitedirector = facade.make_table('siteinfo')
    study_site_table = sitedirector._availdf

    print('study_site_table (test): ', study_site_table)

    facade.create_log_record('study_site_table')
    lter = meta_handle_corner_case.lnedentry['lter']
    ltercol = produce_null_df(1, ['lter_table_fkey'], len(study_site_table),
                              lter)
    study_site_table = concat([study_site_table, ltercol], axis=1)
    print('study_site_table: ', study_site_table)
    facade.push_tables['study_site_table'] = study_site_table

    sitelevels = facade._data[siteid].drop_duplicates().values.tolist()
    facade.register_site_levels(sitelevels)
    facade._valueregister['siteid'] = siteid

    facade.input_register(project_handle_corner_case)
    maindirector = facade.make_table('maininfo')
    project_table = maindirector._availdf.copy().reset_index(drop=True)
    orm.convert_types(project_table, orm.project_types)

    facade.push_tables['project_table'] = project_table
    facade.create_log_record('project_table')

    facade.input_register(taxa_handle_corner_case)
    taxadirector = facade.make_table('taxainfo')
    taxa_table = taxadirector._availdf
    facade.push_tables['taxa_table'] = taxa_table
    facade.create_log_record('taxa_table')

    facade.input_register(time_handle_corner_case)
    timetable = tparse.TimeParse(facade._data,
                                 time_handle_corner_case.lnedentry).formater()
    facade.push_tables['timetable'] = timetable
    facade.create_log_record('timetable')

    facade.input_register(percent_cover_handle_corner_case)
    rawdirector = facade.make_table('rawinfo')
    rawtable = rawdirector._availdf
    print(rawtable)
    facade.push_tables[percent_cover_handle_corner_case.tablename] = rawtable
    facade.create_log_record(percent_cover_handle_corner_case.tablename)

    facade.input_register(covar_handle_corner_case)
    covartable = ddf.DictionaryDataframe(
        facade._data,
        covar_handle_corner_case.lnedentry['columns']).convert_records()
    facade.push_tables['covariates'] = covartable
    facade.create_log_record('covartable')

    facade._valueregister['globalid'] = meta_handle_corner_case.lnedentry[
        'globalid']
    facade._valueregister['lter'] = meta_handle_corner_case.lnedentry['lter']
    facade._valueregister['siteid'] = siteid

    timetable_og_cols = timetable.columns.values.tolist()
    timetable.columns = [x + '_derived' for x in timetable_og_cols]
    observationdf = facade._data
    observation_time_df = concat([timetable, observationdf], axis=1)

    print('merge class obs_time columns: ', observation_time_df.columns)
    print('merge class project table: ', project_table)

    #    try:
    #        study_site_table.to_sql(
    #            'study_site_table',
    #            orm.conn, if_exists='append', index=False)
    #    except Exception as e:
    #        print('Sites in db: ', str(e))
    project_table['lter_project_fkey'] = facade._valueregister['lter']
    project_table.to_sql('project_table',
                         orm.conn,
                         if_exists='append',
                         index=False)

    merge_object = MergeToUpload()
    site_in_project_key_df = merge_object.site_in_proj_key_df(
        studysitetabledf=study_site_table,
        projecttabledf=project_table,
        observationtabledf=observation_time_df,
        lterlocation=facade._valueregister['lter'],
        studysitelabel=siteid,
        studysitelevels=sitelevels)

    merge_object.merge_for_taxa_table_upload(
        formated_taxa_table=taxa_table,
        siteinprojkeydf=site_in_project_key_df,
        sitelabel=siteid)

    taxa_column_in_push_table = [
        x[0] for x in list(facade._inputs['taxainfo'].lnedentry.items())
    ]

    taxa_column_in_data = [
        x[1] for x in list(facade._inputs['taxainfo'].lnedentry.items())
    ]

    merge_object.merge_for_datatype_table_upload(
        raw_dataframe=observation_time_df,
        formated_dataframe=rawtable,
        formated_dataframe_name='{}'.format(
            re.sub('_table', '', facade._inputs['rawinfo'].tablename)),
        covariate_dataframe=covartable,
        siteinprojkeydf=site_in_project_key_df,
        raw_data_taxa_columns=taxa_column_in_data,
        uploaded_taxa_columns=taxa_column_in_push_table)

    obs_columns_in_data = [
        x[1] for x in list(facade._inputs['rawinfo'].lnedentry.items())
    ]
    obs_columns_in_push_table = [
        x[0] for x in list(facade._inputs['rawinfo'].lnedentry.items())
    ]
    merge_object.update_project_table(
        spatial_rep_columns_from_og_df=obs_columns_in_data,
        spatial_rep_columns_from_formated_df=obs_columns_in_push_table)
def test_site_in_project_key_number_two(
    MergeToUpload,
    site_handle2,
    file_handle2,
    meta_handle2,
    project_handle2,
    taxa_handle2,
    time_handle2,
    count_handle2,
    covar_handle2,
):
    facade = face.Facade()

    facade.input_register(meta_handle2)
    facade.meta_verify()

    facade.input_register(file_handle2)
    facade.load_data()

    facade.input_register(site_handle2)
    sitedirector = facade.make_table("siteinfo")
    study_site_table = sitedirector._availdf

    siteid = site_handle2.lnedentry["study_site_key"]
    sitelevels = facade._data[siteid].drop_duplicates().values.tolist()
    facade.register_site_levels(sitelevels)
    print("test2 sitelevels: ", sitelevels)
    facade._valueregister["siteid"] = siteid

    print("study_site_table (test): ", study_site_table)

    facade.create_log_record("study_site_table")
    lter = meta_handle2.lnedentry["lter"]
    ltercol = produce_null_df(1, ["lter_table_fkey"], len(study_site_table), lter)
    study_site_table = concat([study_site_table, ltercol], axis=1)
    study_site_table_og_col = study_site_table.columns.values.tolist()

    study_site_table_single = study_site_table.iloc[0, :]

    study_site_table_single_df = DataFrame([study_site_table_single])
    study_site_table_single_df.columns = study_site_table_og_col

    print("study site single: ", study_site_table_single)

    study_site_table_single_df.loc[0, "study_site_key"] = "NULL"

    print("study_site_table: ", study_site_table_single_df)

    facade.push_tables["study_site_table"] = study_site_table_single_df

    facade.input_register(project_handle2)
    maindirector = facade.make_table("maininfo")
    project_table = maindirector._availdf.copy().reset_index(drop=True)
    orm.convert_types(project_table, orm.project_types)

    facade.push_tables["project_table"] = project_table
    facade.create_log_record("project_table")

    facade.input_register(taxa_handle2)
    taxadirector = facade.make_table("taxainfo")

    taxa_table = taxadirector._availdf
    facade.push_tables["taxa_table"] = taxa_table
    print("taxa columns after make taxa table: ", taxa_table.columns)

    facade.create_log_record("taxa_table")

    print("taxa columns before time_table: ", taxa_table.columns)

    facade.input_register(time_handle2)
    timetable = tparse.TimeParse(facade._data, time_handle2.lnedentry).formater()
    facade.push_tables["timetable"] = timetable
    facade.create_log_record("timetable")

    print("taxa columns before count_table: ", taxa_table.columns)
    facade.input_register(count_handle2)
    rawdirector = facade.make_table("rawinfo")
    rawtable = rawdirector._availdf
    print(rawtable)
    facade.push_tables[count_handle2.tablename] = rawtable
    facade.create_log_record(count_handle2.tablename)

    print("taxa columns before covar_table: ", taxa_table.columns)
    facade.input_register(covar_handle2)
    covartable = ddf.DictionaryDataframe(facade._data, covar_handle2.lnedentry["columns"]).convert_records()
    facade.push_tables["covariates"] = covartable
    facade.create_log_record("covartable")

    facade._valueregister["globalid"] = meta_handle2.lnedentry["globalid"]
    facade._valueregister["lter"] = meta_handle2.lnedentry["lter"]
    facade._valueregister["siteid"] = siteid

    timetable_og_cols = timetable.columns.values.tolist()
    timetable.columns = [x + "_derived" for x in timetable_og_cols]
    observationdf = facade._data
    observation_time_df = concat([timetable, observationdf], axis=1)

    print("merge class obs_time columns: ", observation_time_df.columns)
    print("merge class project table: ", project_table)

    study_site_table.to_sql("study_site_table", orm.conn, if_exists="append", index=False)
    project_table["lter_project_fkey"] = facade._valueregister["lter"]
    project_table.to_sql("project_table", orm.conn, if_exists="append", index=False)

    print("taxa columns before site_in_proj method: ", taxa_table.columns)

    merge_object = MergeToUpload()
    site_in_project_key_df = merge_object.site_in_proj_key_df(
        studysitetabledf=study_site_table,
        projecttabledf=project_table,
        observationtabledf=observation_time_df,
        lterlocation=facade._valueregister["lter"],
        studysitelabel=siteid,
        studysitelevels=sitelevels,
    )

    print("taxa columns before user taxa merge method: ", taxa_table.columns)
    merge_object.merge_for_taxa_table_upload(
        formated_taxa_table=taxa_table, siteinprojkeydf=site_in_project_key_df, sitelabel=siteid
    )

    taxa_column_in_data = [x[0] for x in list(facade._inputs["taxainfo"].lnedentry.items())]

    taxa_column_in_push_table = [x[1] for x in list(facade._inputs["taxainfo"].lnedentry.items())]

    merge_object.merge_for_datatype_table_upload(
        raw_dataframe=observation_time_df,
        formated_dataframe=rawtable,
        formated_dataframe_name="{}".format(re.sub("_table", "", facade._inputs["rawinfo"].tablename)),
        covariate_dataframe=covartable,
        siteinprojkeydf=site_in_project_key_df,
        raw_data_taxa_columns=taxa_column_in_data,
        uploaded_taxa_columns=taxa_column_in_push_table,
    )
Ejemplo n.º 7
0
    def submit_change(self):
        '''
        Method to get data from user form and make project table
        to upload
        '''
        sender = self.sender()
        self.form_entries = OrderedDict(
            (('samplingunits',
              self.dtype(self.lnedDatatypeunits.text() != '',
                         self.lnedDatatypeunits.text(), None)),
             ('datatype',
              self.dtype(self.cboxDatatype.currentText() != '',
                         self.cboxDatatype.currentText(),
                         None)), ('structured_type_1',
                                  self.structure(
                                      self.ckStructure1.isChecked(),
                                      self.lnedStructure1.text(),
                                      self.lnedStructureunits1.text())),
             ('structured_type_2',
              self.structure(self.ckStructure2.isChecked(),
                             self.lnedStructure2.text(),
                             self.lnedStructureunits2.text())),
             ('structured_type_3',
              self.structure(self.ckStructure3.isChecked(),
                             self.lnedStructure3.text(),
                             self.lnedStructureunits3.text())),
             ('structured_type_4',
              self.structure(self.ckStructure4.isChecked(),
                             self.lnedStructure4.text(),
                             self.lnedStructureunits4.text())),
             ('samplefreq',
              self.sampfreq(self.cboxSamplingfrequency.currentText() != 'NULL',
                            self.cboxSamplingfrequency.currentText(), None)),
             ('studytype',
              self.studytype(self.cboxStudytype.currentText() != 'NULL',
                             self.cboxStudytype.currentText(), None)),
             ('community',
              self.community(
                  True, ('yes' if self.rbtnCommunityyes.isChecked() is True
                         else 'no'),
                  None)), ('spatial_replication_level_1_extent',
                           self.ext(self.ckSpatialextent1.isChecked(),
                                    self.lnedSpatialextent1.text(),
                                    self.lnedSpatialextentunits1.text())),
             ('spatial_replication_level_2_extent',
              self.ext(self.ckSpatialextent2.isChecked(),
                       self.lnedSpatialextent2.text(),
                       self.lnedSpatialextentunits2.text())),
             ('spatial_replication_level_3_extent',
              self.ext(self.ckSpatialextent3.isChecked(),
                       self.lnedSpatialextent3.text(),
                       self.lnedSpatialextentunits3.text())),
             ('spatial_replication_level_4_extent',
              self.ext(self.ckSpatialextent4.isChecked(),
                       self.lnedSpatialextent4.text(),
                       self.lnedSpatialextentunits4.text())),
             ('spatial_replication_level_5_extent',
              self.ext(self.ckSpatialextent5.isChecked(),
                       self.lnedSpatialextent5.text(),
                       self.lnedSpatialextentunits5.text())),
             ('treatment_type_1',
              self.treatments(self.cboxTreatment1.currentText() != 'NULL',
                              self.cboxTreatment1.currentText(), None)),
             ('treatment_type_2',
              self.treatments(self.cboxTreatment2.currentText() != 'NULL',
                              self.cboxTreatment2.currentText(), None)),
             ('treatment_type_3',
              self.treatments(self.cboxTreatment3.currentText() != 'NULL',
                              self.cboxTreatment3.currentText(),
                              None)), ('control_group',
                                       self.treatments(
                                           self.ckControlgroup.isChecked(),
                                           self.lnedControlgroup.text(),
                                           None)),
             ('derived',
              self.derived(self.cboxDerived.currentText() != 'NULL',
                           self.cboxDerived.currentText(),
                           None)), ('authors',
                                    self.contacts(self.lnedAuthor.text() != '',
                                                  self.lnedAuthor.text(),
                                                  None)),
             ('authors_contact',
              self.contacts(self.lnedContact.text() != '',
                            self.lnedContact.text(), None))))

        self.mainini = ini.InputHandler(name='maininfo',
                                        tablename='project_table',
                                        lnedentry=self.form_entries)

        self.facade.input_register(self.mainini)
        try:
            self.maindirector = self.facade.make_table('maininfo')
        except Exception as e:
            print(str(e))
            self.error.showMessage(str(e))
        self.facade.create_log_record('project_table')
        self._log = self.facade._tablelog['project_table']
        self.project_table = self.maindirector._availdf.copy()

        try:
            check_list = [
                'authors', 'authors_contact', 'studytype', 'derived',
                'community', 'samplefreq', 'datatype'
            ]
            record = None
            for i, item in enumerate(check_list):
                print(item, ': ', self.form_entries[item].entry)
                record = item
                assert (self.form_entries[item].entry != 'NULL') is True

                assert (self.form_entries[item].entry != '') is True

            if sender is self.btnPreview:
                self.mainmodel = self.viewEdit(self.project_table)
                self.preview.tabviewPreview.setModel(self.mainmodel)
                self.preview.show()
                return
            else:
                pass
            self.facade.push_tables['project_table'] = self.project_table
            self._log.debug(
                'project_table mod: ' +
                ' '.join(self.project_table.columns.values.tolist()))

            orm.convert_types(self.project_table, orm.project_types)
            hlp.write_column_to_log(self.form_entries, self._log,
                                    'project_table')
            self.close()

        except Exception as e:
            print(str(e))
            self.error.showMessage('Invalid entry: ' + record)
Ejemplo n.º 8
0
    def submit_change(self):
        '''
        Method to get data from user form and make project table
        to upload
        '''
        sender = self.sender()
        self.form_entries = OrderedDict((
            ('samplingunits', self.dtype(
                self.lnedDatatypeunits.text() != '',
                self.lnedDatatypeunits.text(),
                None
            )),
            ('datatype', self.dtype(
                self.cboxDatatype.currentText() != '',
                self.cboxDatatype.currentText(),
                None
            )),
            ('structured_type_1', self.structure(
                self.ckStructure1.isChecked(),
                self.lnedStructure1.text(),
                self.lnedStructureunits1.text()
            )),
            ('structured_type_2', self.structure(
                self.ckStructure2.isChecked(),
                self.lnedStructure2.text(),
                self.lnedStructureunits2.text()
            )),
            ('structured_type_3', self.structure(
                self.ckStructure3.isChecked(),
                self.lnedStructure3.text(),
                self.lnedStructureunits3.text()
            )),
            ('samplefreq', self.sampfreq(
                self.cboxSamplingfrequency.currentText() != 'NULL',
                self.cboxSamplingfrequency.currentText(),
                None
            )),
            ('studytype', self.studytype(
                self.cboxStudytype.currentText() != 'NULL',
                self.cboxStudytype.currentText(),
                None
            )),
            ('community', self.community(
                True,
                (
                    'yes' if
                    self.rbtnCommunityyes.isChecked() is True
                    else 'no'
                ),
                None
            )),
            ('spatial_replication_level_1_extent', self.ext(
                self.ckSpatialextent1.isChecked(),
                self.lnedSpatialextent1.text(),
                self.lnedSpatialextentunits1.text()
            )),
            ('spatial_replication_level_2_extent', self.ext(
                self.ckSpatialextent2.isChecked(),
                self.lnedSpatialextent2.text(),
                self.lnedSpatialextentunits2.text()
            )),
            ('spatial_replication_level_3_extent', self.ext(
                self.ckSpatialextent3.isChecked(),
                self.lnedSpatialextent3.text(),
                self.lnedSpatialextentunits3.text()
            )),
            ('spatial_replication_level_4_extent', self.ext(
                self.ckSpatialextent4.isChecked(),
                self.lnedSpatialextent4.text(),
                self.lnedSpatialextentunits4.text()
            )),
            ('spatial_replication_level_5_extent', self.ext(
                self.ckSpatialextent5.isChecked(),
                self.lnedSpatialextent5.text(),
                self.lnedSpatialextentunits5.text()

            )),
            ('treatment_type_1', self.treatments(
                self.cboxTreatment1.currentText() != 'NULL',
                self.cboxTreatment1.currentText(),
                None
            )),
            ('treatment_type_2', self.treatments(
                self.cboxTreatment2.currentText() != 'NULL',
                self.cboxTreatment2.currentText(),
                None
            )),
            ('treatment_type_3', self.treatments(
                self.cboxTreatment3.currentText() != 'NULL',
                self.cboxTreatment3.currentText(),
                None
            )),
            ('control_group', self.treatments(
                self.ckControlgroup.isChecked(),
                self.lnedControlgroup.text(),
                None
            )),
            ('derived', self.derived(
                self.cboxDerived.currentText() != 'NULL',
                self.cboxDerived.currentText(),
                None
            )),
            ('authors', self.contacts(
                self.lnedAuthor.text() != '',
                self.lnedAuthor.text(),
                None
            )),
            ('authors_contact', self.contacts(
                self.lnedContact.text() != '',
                self.lnedContact.text(),
                None
            ))
        ))

        self.mainini = ini.InputHandler(
            name='maininfo', tablename='project_table',
            lnedentry=self.form_entries
        )

        self.facade.input_register(self.mainini)
        try:
            self.maindirector = self.facade.make_table('maininfo')
        except Exception as e:
            print(str(e))
            self.error.showMessage(str(e))
        self.facade.create_log_record('project_table')
        self._log = self.facade._tablelog['project_table']
        self.project_table = self.maindirector._availdf.copy()

        try:
            check_list = [
                'authors', 'authors_contact', 'studytype',
                'derived', 'community', 'samplefreq', 'datatype'
            ]
            record = None
            for i, item in enumerate(check_list):
                print(item, ': ', self.form_entries[item].entry)
                record = item
                assert (
                    self.form_entries[item].entry != 'NULL') is True

                assert (
                    self.form_entries[item].entry != '') is True

            if sender is self.btnPreview:
                self.mainmodel = self.viewEdit(self.project_table)
                self.preview.tabviewPreview.setModel(self.mainmodel)
                self.preview.show()
                return
            else:
                pass
            self.facade.push_tables['project_table'] = self.project_table
            self._log.debug(
                'project_table mod: ' +
                ' '.join(self.project_table.columns.values.tolist()))

            orm.convert_types(self.project_table, orm.project_types)
            hlp.write_column_to_log(
                self.form_entries, self._log, 'project_table')
            self.close()

        except Exception as e:
            print(str(e))
            self.error.showMessage(
                'Invalid entry: ' + record
            )
def test(
        Facade_push, site_handle_1_count, file_handle_1_count,
        meta_handle_1_count, project_handle_1_count, taxa_handle_1_count,
        time_handle_1_count, count_handle_1_count, covar_handle_1_count):
    facade = Facade_push()
    facade.input_register(meta_handle_1_count)
    facade.meta_verify()

    facade.input_register(file_handle_1_count)
    facade.load_data()

    facade.input_register(site_handle_1_count)
    sitedirector = facade.make_table('siteinfo')
    study_site_table = sitedirector._availdf

    study_site_table['lter_table_fkey'] = facade._valueregister[
        'globalid'] = meta_handle_1_count.lnedentry['lter']
    print(study_site_table)

    facade.push_tables['study_site_table'] = study_site_table

    siteid = site_handle_1_count.lnedentry['study_site_key']
    sitelevels = facade._data[
        siteid].drop_duplicates().values.tolist()
    facade.register_site_levels(sitelevels)

    facade.input_register(project_handle_1_count)
    maindirector = facade.make_table('maininfo')
    project_table = maindirector._availdf.copy().reset_index(drop=True)
    orm.convert_types(project_table, orm.project_types)
    facade.push_tables['project_table'] = project_table
    print('project table: ', project_table)

    facade.input_register(taxa_handle_1_count)
    taxadirector = facade.make_table('taxainfo')
    facade.push_tables['taxa_table'] = taxadirector._availdf

    facade.input_register(time_handle_1_count)
    timetable = tparse.TimeParse(
        facade._data, time_handle_1_count.lnedentry).formater()
    timetable_og_cols = timetable.columns.values.tolist()
    timetable.columns = [x+'_derived' for x in timetable_og_cols]
    observationdf = facade._data
    observation_time_df = concat([timetable, observationdf], axis=1)

    facade.push_tables['timetable'] = observation_time_df

    facade.input_register(count_handle_1_count)
    rawdirector = facade.make_table('rawinfo')
    facade.push_tables[facade._inputs[
        'rawinfo'].tablename] = rawdirector._availdf

    facade.input_register(covar_handle_1_count)
    covartable = ddf.DictionaryDataframe(
        facade._data,
        covar_handle_1_count.lnedentry['columns']).convert_records()
    facade.push_tables['covariates'] = covartable

    facade._valueregister['globalid'] = meta_handle_1_count.lnedentry['globalid']
    facade._valueregister['lter'] = meta_handle_1_count.lnedentry['lter']
    facade._valueregister['siteid'] = facade._inputs[
        'siteinfo'].lnedentry['study_site_key']

    facade.push_merged_data()
Ejemplo n.º 10
0
    def merge_for_datatype_table_upload(
            self, raw_dataframe,
            formated_dataframe,
            formated_dataframe_name,
            covariate_dataframe,
            siteinprojkeydf,
            raw_data_taxa_columns,
            uploaded_taxa_columns):

        print('start dtype upload')
        orm.replace_numeric_null_with_string(raw_dataframe)
        orm.replace_numeric_null_with_string(formated_dataframe)
        print('replacing nulls is a pain')

        # Step 2) Query taxa_table to get the auto generated
        # primary keys returned. Turn query data into
        # dataframe.
        session = self.session
        taxa_key_statement = session.execute(
            select([orm.taxa_table]).
            where(
                orm.taxa_table.__table__.c.site_in_project_taxa_key.in_
                (siteinprojkeydf['site_in_project_key'].values.tolist())
            )
        )
        session.close()
        taxa_key_df = DataFrame(taxa_key_statement.fetchall())
        taxa_key_df.columns = taxa_key_statement.keys()
        taxa_key_df.replace({None: 'NA'}, inplace=True)

        # Step 3) Subsetting the query tabled for record that only pertain
        # to the count data (because we will be subsetting from this
        # queried taxa table later)
        dtype_subset_taxa_key_df = taxa_key_df[
            taxa_key_df['site_in_project_taxa_key'].isin(
                siteinprojkeydf['site_in_project_key'])]

        # Step 4) Merge the taxa_table query results with
        # the site_in_project table query that was performed
        # to upload the taxa_table (see above). This gives
        # you a table with site names and taxonomic information
        # allowing for a merge with the original dtype data
        tbl_dtype_merged_taxakey_siteinprojectkey = merge(
            dtype_subset_taxa_key_df, siteinprojkeydf,
            left_on='site_in_project_taxa_key',
            right_on='site_in_project_key', how='inner')

        raw_dataframe_siteinproj = merge(
            raw_dataframe, siteinprojkeydf,
            left_on=self.sitelabel, right_on='study_site_table_fkey',
            sort=False, how='left')

        raw_data_taxa_columns.append('site_in_project_key')
        uploaded_taxa_columns.append('site_in_project_taxa_key')

        raw_data_taxa_columns.append('site_in_project_key')
        uploaded_taxa_columns.append('site_in_project_taxa_key')

        print('updated raw data col list: ', raw_dataframe_siteinproj)
        print('update taxa data col list: ', uploaded_taxa_columns)
        # Step 5) Merge the original dtype data with the
        # merged taxa_table query to have all foreign keys...
        # taxa and site_project
        # matched up with the original observations.
        dtype_merged_with_taxa_and_siteinproj_key = merge(
            raw_dataframe_siteinproj,
            tbl_dtype_merged_taxakey_siteinprojectkey,
            left_on=list(raw_data_taxa_columns),
            right_on=list(uploaded_taxa_columns),
            how='left')

        # Step 6) Take the merged original data with all foreign keys,
        # and merged that with the formatted dtype_table based on index
        # values (order or records should not changed from the original data
        # to the formatted data)
        tbl_dtype_merged_with_all_keys = merge(
            formated_dataframe,
            dtype_merged_with_taxa_and_siteinproj_key,
            left_index=True, right_index=True, how='inner',
            suffixes=('', '_y'))

        # Step 7) List the columns that will be needed to push the
        # dtype table to the database (including foreign keys)
        tbl_dtype_columns_to_upload = [
            'taxa_table_key', 'site_in_project_taxa_key', 'year_derived',
            'month_derived', 'day_derived', 'spatial_replication_level_1',
            'spatial_replication_level_2', 'spatial_replication_level_3',
            'spatial_replication_level_4', 'spatial_replication_level_5',
            'structure_type_1', 'structure_type_2',
            'structure_type_3', 'structure_type_4',
            'treatment_type_1', 'treatment_type_2',
            'treatment_type_3',
            'covariates'
        ]
        time_cols_rename = {
            'year_derived': 'year',
            'month_derived': 'month',
            'day_derived': 'day'
        }            
        tbl_dtype_columns_to_upload.append(
            '{}_observation'.format(str(formated_dataframe_name)))
        tbl_dtype_merged_with_all_keys = concat(
            [tbl_dtype_merged_with_all_keys, covariate_dataframe],
            axis=1)

        # Step 8) Subsetting the fully merged dtype table data
        tbl_dtype_to_upload = tbl_dtype_merged_with_all_keys[
            tbl_dtype_columns_to_upload]
        tbl_dtype_to_upload.rename(
            columns=time_cols_rename, inplace=True
        )

        # Step 9) Renaming columns to reflect that in database table
        # And converting data types
        tbl_dtype_to_upload.rename(columns={
            'taxa_table_key': 'taxa_{}_fkey'.format(str(formated_dataframe_name))}
            , inplace=True)
        datatype_key = 'site_in_project_{}_fkey'.format(str(formated_dataframe_name))
        tbl_dtype_to_upload.rename(columns={
            'site_in_project_taxa_key': datatype_key}, inplace=True)
        tbl_dtype_to_upload.fillna('NA', inplace=True)
        self.formateddata = tbl_dtype_to_upload
        # Step 10) Uploading to the database
        datatype_table = '{}_table'.format(str(formated_dataframe_name))
        datatype_obs = '{}_observation'.format(str(formated_dataframe_name))
        print('push raw_before', tbl_dtype_to_upload.columns)

        tbl_dtype_to_upload[datatype_obs] = to_numeric(
            tbl_dtype_to_upload[datatype_obs], errors='coerce'
        )
        
        text_cols = [
            'spatial_replication_level_1', 'spatial_replication_level_2',
            'spatial_replication_level_3', 'spatial_replication_level_4',
            'spatial_replication_level_5', 'treatment_type_1',
            'treatment_type_2', 'treatment_type_3',
            'structure_type_1', 'structure_type_2', 'structure_type_3',
            'structure_type_4'
        ]
        tbl_dtype_to_upload[text_cols] = tbl_dtype_to_upload[
            text_cols].applymap(str)
        tbl_dtype_to_upload[text_cols] = tbl_dtype_to_upload[
            text_cols].applymap(lambda x: x.strip())

        print(tbl_dtype_to_upload.dtypes)
        print(self.table_types[datatype_table])
        try:
            orm.convert_types(tbl_dtype_to_upload, self.table_types[datatype_table])
        except Exception as e:
            print('converting issues: ', str(e))


        print('push raw_after', tbl_dtype_to_upload.columns)
        print(tbl_dtype_to_upload.dtypes)
        print('this should have worked')


        other_numerics = [
            'year', 'month', 'day', datatype_key,
            'taxa_{}_fkey'.format(str(formated_dataframe_name))
        ]

        tbl_dtype_to_upload[datatype_obs] = to_numeric(
            tbl_dtype_to_upload[datatype_obs], errors='coerce'
        )

        try:
            tbl_dtype_to_upload[other_numerics].replace({'NA', -99999}, inplace=True)
        except Exception as e:
            print('No NAs to replace:', str(e))
        try:
            tbl_dtype_to_upload[other_numerics].replace({'NaN' -99999}, inplace=True)
        except Exception as e:
            print('No NaN to replace:', str(e))
        try:
            tbl_dtype_to_upload[other_numerics].replace({None, -99999}, inplace=True)
        except Exception as e:
            print('No None to replace:', str(e))
        tbl_dtype_to_upload[other_numerics].fillna(-99999, inplace=True)


        tbl_dtype_to_upload.loc[:, other_numerics] = tbl_dtype_to_upload.loc[
            :, other_numerics].apply(to_numeric, errors='coerce')

        metadata_key_column_name = 'metadata_{}_key'.format(
            formated_dataframe_name)
        tbl_dtype_to_upload[metadata_key_column_name] = int(self.metadata_key)

        # Attempting direct copy_from to copy_to commands
        # with stringIO (should be faster than pandas)
        # text buffer            
        sql_datatype_columns = tbl_dtype_to_upload.columns.values.tolist()
        s_buf = io.StringIO()
        tbl_dtype_to_upload.to_csv(s_buf, index=False, sep="\t")
        s_buf.seek(0)
        session = orm.Session()
        cur = session.connection().connection.cursor()
        copy_sql_statement = "COPY {}({}) FROM STDIN WITH CSV HEADER DELIMITER AS '\t'".format(
            datatype_table, ", ".join(sql_datatype_columns))
        cur.copy_expert(copy_sql_statement, s_buf)
        session.commit()
        session.close()

        #tbl_dtype_to_upload.to_sql(
        #    datatype_table,
        #    orm.conn, if_exists='append', index=False, chunksize=1000)

        print('past datatype upload')
Ejemplo n.º 11
0
    def merge_for_taxa_table_upload(
            self, formated_taxa_table, siteinprojkeydf,
            sitelabel
    ):
        '''
        Method to take the data stored in the user facade class
        and upload the database
        REQUIRES: formated taxa table (with database names),
        dataframe with site levels merged to site_in_project
        (formated_taxa_table)
        primary keys (created in method above...siteinprojkeydf)
        , and the name of the column with the site (sitelabel)
        to make the site_in_project_table (including the merge)
        as well as merge all other tables for keys and upload.
        To merge the site_in_project_table a series of steps
        must be performed

        1) Merged the formated taxa table with the site_in_project
        data that contains primary keys for site_in_project table
        and site levels (merge on site levels)

        2) Dropping columns not neccessary for the taxa_table
        push (metadata key: project_table_fkey,
        site label:study_site_table_fkey)

        3) Rename merged site_in_project_table primary key
        to match taxa table name

        4) Push taxa_table to database

        '''

        print('starting taxa table upload')
        orm.replace_numeric_null_with_string(formated_taxa_table)
        print('past orm replace numeric')
        print('siteingproj key df: ', siteinprojkeydf)
        print('siteingproj key df: ', siteinprojkeydf.columns)
        print('formatted taxa df: ', formated_taxa_table)
        print('formatted taxa df: ', formated_taxa_table.columns)

        tbl_taxa_with_site_in_proj_key = merge(
            formated_taxa_table, siteinprojkeydf,
            left_on=sitelabel,
            right_on='study_site_table_fkey',
            how='inner')
        print('past tbl_taxa site in proj key')
        tbl_taxa_merged = tbl_taxa_with_site_in_proj_key.copy()

        tbl_taxa_merged.drop([
            'study_site_table_fkey', sitelabel,
            'project_table_fkey'], inplace=True, axis=1)
        print('past tbl_taxa drop: ', tbl_taxa_merged)
        tbl_taxa_merged.rename(
            columns={
                'site_in_project_key': 'site_in_project_taxa_key'}, inplace=True)
        print('merge class taxa merged: ', tbl_taxa_merged)

        tbl_taxa_merged.fillna('NA', inplace=True)
        try:
            orm.convert_types(tbl_taxa_merged, orm.taxa_types)
        except Exception as e:
            print('converting issues: ', str(e))

        session = self.session
        site_in_proj_key_query = session.execute(
            select(
                [orm.taxa_table.__table__.c.site_in_project_taxa_key]
            ).
            distinct()
        )
        session.close()
        
        check_site_in_proj_keys = DataFrame(site_in_proj_key_query.fetchall())
        if check_site_in_proj_keys.empty:
            check_site_in_proj_keys = []
        else:
            check_site_in_proj_keys = check_site_in_proj_keys[0].values.tolist()
            
    
        if all(
                x in check_site_in_proj_keys
                for x in
                siteinprojkeydf['site_in_project_key'].values.tolist()):
            pass
        else:
            taxacolumns = [
                'site_in_project_taxa_key', 'sppcode', 'kingdom',
                'subkingdom', 'infrakingdom', 'superdivision', 'division',
                'subdivision', 'superphylum', 'phylum', 'subphylum',
                'clss', 'subclass', 'ordr', 'family', 'genus', 'species',
                'common_name', 'authority', 'metadata_taxa_key'
            ]
            tbl_taxa_merged['metadata_taxa_key'] = int(self.metadata_key)
            tbl_taxa_merged[taxacolumns].to_sql(
                'taxa_table', orm.conn, if_exists='append', index=False)
Ejemplo n.º 12
0
            'study_site_key': study_site_key,
            'lter_table_fkey': site_dict['lterid'][1],
            'lat_study_site': site_lat,
            'lng_study_site': site_lng
        })
        if 'descript' not in site_dict.keys():
            sitetable['descript'] = ['NA'] * len(sitetable)
        else:
            try:
                sitetable['descript'] = site_dict['descript'][1]
            except Exception as e:
                sitetable['descript'] = ['NA'] * len(sitetable)
                print('descript not recorded')

        try:
            orm.convert_types(sitetable, orm.study_site_types)
        except Exception as e:
            print('converting issues: ', str(e))

        study_site_table_numeric_columns = ['lat_study_site', 'lng_study_site']
        # convert datatype to string/object
        sitetable[sitetable.columns.difference(
            study_site_table_numeric_columns)] = sitetable[
                sitetable.columns.difference(
                    study_site_table_numeric_columns)].applymap(str)
        # Strip strings of leading and trailing whitespace
        sitetable[sitetable.columns.difference(
            study_site_table_numeric_columns)] = sitetable[
                sitetable.columns.difference(study_site_table_numeric_columns
                                             )].applymap(lambda x: x.strip())