def get_dataframe( self, dataframe, acols, nullcols, dbcol, globalid, siteid, sitelevels): acols = list(acols) if acols is not None else acols nullcols = list(nullcols) if acols is not None else nullcols dbcol = list(dbcol) if dbcol is not None else dbcol sitelevels = list(sitelevels) if sitelevels is not None else sitelevels acols = [x.rstrip() for x in acols] nullcols = [x.rstrip() for x in nullcols] dbcol = [x.rstrip() for x in dbcol] # Columns that will be updated later in the # program try: updatedf = hlp.produce_null_df( len(dbcol), dbcol, len(sitelevels), 'NA') except Exception as e: acols = [int(x) for x in acols] updatedf = hlp.produce_null_df( len(dbcol), dbcol, len(sitelevels), 'NA') print(str(e)) updatedf['siteid'] = sitelevels return updatedf
def get_dataframe(self, dataframe, acols, nullcols, dbcol, globalid, siteid, sitelevels): acols = list(acols) if acols is not None else acols nullcols = list(nullcols) if acols is not None else nullcols dbcol = list(dbcol) if dbcol is not None else dbcol sitelevels = list(sitelevels) if sitelevels is not None else sitelevels acols = [x.rstrip() for x in acols] nullcols = [x.rstrip() for x in nullcols] dbcol = [x.rstrip() for x in dbcol] # Columns that will be updated later in the # program try: updatedf = hlp.produce_null_df(len(dbcol), dbcol, len(sitelevels), 'NA') except Exception as e: acols = [int(x) for x in acols] updatedf = hlp.produce_null_df(len(dbcol), dbcol, len(sitelevels), 'NA') print(str(e)) updatedf['siteid'] = sitelevels return updatedf
def time_regex(data, col, form, nulls): ''' Method to format the date columns in the raw data based on user input. Returns 3 formatted columns i.e. (year, month, day) including nulls ''' fields = ['month', 'day', 'year'] if any(isinstance(i, list) for i in col): col = list(chain.from_iterable(col)) else: pass print(type(col)) print(col) if len(nulls) > 0: nulldf = hlp.produce_null_df( len(nulls), nulls, len(data), 'NaN') else: nulldf = DataFrame() try: if col[0] is not None: time_list_re = hlp.strip_time(data, col) else: time_list_re = [] except Exception as e: print(str(e)) raise AttributeError('Could not strip time format') notnull = [x for x in fields if x not in nulls] for i,item in enumerate(form): try: time_form_list = [] for j in time_list_re: time_form_list.append( [ to_datetime( x, format=form[i]) for x in j ] ) if len(time_form_list) > 1: timedf = DataFrame( [list(x) for x in zip( *time_form_list)]) else: timedf = DataFrame(time_form_list[0]) if len(notnull) == 1: timedf.columns = notnull else: pass final = {'formatted': timedf, 'null': nulldf} return final except Exception as e: print(str(e)) print('Trying different format')
def time_regex(data, col, form, nulls): ''' Method to format the date columns in the raw data based on user input. Returns 3 formatted columns i.e. (year, month, day) including nulls ''' fields = ['month', 'day', 'year'] if any(isinstance(i, list) for i in col): col = list(chain.from_iterable(col)) else: pass print(type(col)) print(col) if len(nulls) > 0: nulldf = hlp.produce_null_df(len(nulls), nulls, len(data), 'NaN') else: nulldf = DataFrame() try: if col[0] is not None: time_list_re = hlp.strip_time(data, col) else: time_list_re = [] except Exception as e: print(str(e)) raise AttributeError('Could not strip time format') notnull = [x for x in fields if x not in nulls] for i, item in enumerate(form): try: time_form_list = [] for j in time_list_re: time_form_list.append( [to_datetime(x, format=form[i]) for x in j]) if len(time_form_list) > 1: timedf = DataFrame([list(x) for x in zip(*time_form_list)]) else: timedf = DataFrame(time_form_list[0]) if len(notnull) == 1: timedf.columns = notnull else: pass final = {'formatted': timedf, 'null': nulldf} return final except Exception as e: print(str(e)) print('Trying different format')
def test_site_in_project_key(MergeToUpload, site_handle_corner_case, file_handle_corner_case, meta_handle_corner_case, project_handle_corner_case, taxa_handle_corner_case, time_handle_corner_case, percent_cover_handle_corner_case, covar_handle_corner_case): facade = face.Facade() facade.input_register(meta_handle_corner_case) facade.meta_verify() facade.input_register(file_handle_corner_case) facade.load_data() siteid = site_handle_corner_case.lnedentry['study_site_key'] facade._data[siteid].replace( { 'C': 'site_jrn_zone_creosotebush', 'G': 'site_jrn_zone_grassland' }, inplace=True) facade.input_register(site_handle_corner_case) sitedirector = facade.make_table('siteinfo') study_site_table = sitedirector._availdf print('study_site_table (test): ', study_site_table) facade.create_log_record('study_site_table') lter = meta_handle_corner_case.lnedentry['lter'] ltercol = produce_null_df(1, ['lter_table_fkey'], len(study_site_table), lter) study_site_table = concat([study_site_table, ltercol], axis=1) print('study_site_table: ', study_site_table) facade.push_tables['study_site_table'] = study_site_table sitelevels = facade._data[siteid].drop_duplicates().values.tolist() facade.register_site_levels(sitelevels) facade._valueregister['siteid'] = siteid facade.input_register(project_handle_corner_case) maindirector = facade.make_table('maininfo') project_table = maindirector._availdf.copy().reset_index(drop=True) orm.convert_types(project_table, orm.project_types) facade.push_tables['project_table'] = project_table facade.create_log_record('project_table') facade.input_register(taxa_handle_corner_case) taxadirector = facade.make_table('taxainfo') taxa_table = taxadirector._availdf facade.push_tables['taxa_table'] = taxa_table facade.create_log_record('taxa_table') facade.input_register(time_handle_corner_case) timetable = tparse.TimeParse(facade._data, time_handle_corner_case.lnedentry).formater() facade.push_tables['timetable'] = timetable facade.create_log_record('timetable') facade.input_register(percent_cover_handle_corner_case) rawdirector = facade.make_table('rawinfo') rawtable = rawdirector._availdf print(rawtable) facade.push_tables[percent_cover_handle_corner_case.tablename] = rawtable facade.create_log_record(percent_cover_handle_corner_case.tablename) facade.input_register(covar_handle_corner_case) covartable = ddf.DictionaryDataframe( facade._data, covar_handle_corner_case.lnedentry['columns']).convert_records() facade.push_tables['covariates'] = covartable facade.create_log_record('covartable') facade._valueregister['globalid'] = meta_handle_corner_case.lnedentry[ 'globalid'] facade._valueregister['lter'] = meta_handle_corner_case.lnedentry['lter'] facade._valueregister['siteid'] = siteid timetable_og_cols = timetable.columns.values.tolist() timetable.columns = [x + '_derived' for x in timetable_og_cols] observationdf = facade._data observation_time_df = concat([timetable, observationdf], axis=1) print('merge class obs_time columns: ', observation_time_df.columns) print('merge class project table: ', project_table) # try: # study_site_table.to_sql( # 'study_site_table', # orm.conn, if_exists='append', index=False) # except Exception as e: # print('Sites in db: ', str(e)) project_table['lter_project_fkey'] = facade._valueregister['lter'] project_table.to_sql('project_table', orm.conn, if_exists='append', index=False) merge_object = MergeToUpload() site_in_project_key_df = merge_object.site_in_proj_key_df( studysitetabledf=study_site_table, projecttabledf=project_table, observationtabledf=observation_time_df, lterlocation=facade._valueregister['lter'], studysitelabel=siteid, studysitelevels=sitelevels) merge_object.merge_for_taxa_table_upload( formated_taxa_table=taxa_table, siteinprojkeydf=site_in_project_key_df, sitelabel=siteid) taxa_column_in_push_table = [ x[0] for x in list(facade._inputs['taxainfo'].lnedentry.items()) ] taxa_column_in_data = [ x[1] for x in list(facade._inputs['taxainfo'].lnedentry.items()) ] merge_object.merge_for_datatype_table_upload( raw_dataframe=observation_time_df, formated_dataframe=rawtable, formated_dataframe_name='{}'.format( re.sub('_table', '', facade._inputs['rawinfo'].tablename)), covariate_dataframe=covartable, siteinprojkeydf=site_in_project_key_df, raw_data_taxa_columns=taxa_column_in_data, uploaded_taxa_columns=taxa_column_in_push_table) obs_columns_in_data = [ x[1] for x in list(facade._inputs['rawinfo'].lnedentry.items()) ] obs_columns_in_push_table = [ x[0] for x in list(facade._inputs['rawinfo'].lnedentry.items()) ] merge_object.update_project_table( spatial_rep_columns_from_og_df=obs_columns_in_data, spatial_rep_columns_from_formated_df=obs_columns_in_push_table)
def get_dataframe(self, dataframe, acols, nullcols, keycols, dbcol, globalid, siteid, sitelevels): acols = list(acols) if acols is not None else acols nullcols = list(nullcols) if acols is not None else nullcols keycols = list(keycols) if keycols is not None else keycols dbcol = list(dbcol) if dbcol is not None else dbcol sitelevels = list(sitelevels) if sitelevels is not None else sitelevels [ '{}_observation'.format( re.sub('_table', '', self._inputs.tablename)) for x in acols if x == 'unitobs' ] [ '{}_observation'.format( re.sub('_table', '', self._inputs.tablename)) for x in nullcols if x == 'unitobs' ] print('obs acols: ', acols) print('obs nullcols: ', nullcols) if self._inputs.tablename == 'individual_table': try: acols.remove('') except Exception as e: print('no individual column to remove: ', str(e)) else: pass try: acols = [x.rstrip() for x in acols] except Exception as e: acols = [int(x) for x in acols] uniquesubset = dataframe[acols] print(str(e)) # Insert siteid column and remove # spatial rep 1 from null columns (already have data # in siteid column of raw data) acols.insert(0, siteid) nullcols.remove('spatial_replication_level_1') nullcols.remove('{}_observation'.format( re.sub('_table', '', self._inputs.tablename))) columns_to_be_added_later = ['year', 'month', 'day', 'covariates'] [nullcols.remove(x) for x in columns_to_be_added_later] [nullcols.remove(x) for x in keycols] if self._inputs.foreignmergeddata is None: pass else: columns_where_data_is_from_query = [ 'taxa_{}_fkey'.format( re.sub('_table', '', self._inputs.tablename)), 'site_in_project_{}_fkey'.format( re.sub('_table', '', self._inputs.tablename)) ] [acols.append(x) for x in columns_where_data_is_from_query] uniquesubset = dataframe[acols] print('uniquesub: ', uniquesubset) nullsubset = hlp.produce_null_df(ncols=len(nullcols), colnames=nullcols, dflength=len(uniquesubset), nullvalue='NA') print('null subset: ', nullsubset) _concat = concat([uniquesubset, nullsubset], axis=1).reset_index() final = _concat.reset_index() if self._inputs.tablename == 'individual_table': final['individual_observation'] = 1 print('should have added individual observation') else: pass print('final build class columns: ', final.columns) try: fomated_column_to_change = list(self._inputs.lnedentry.keys()) fomated_column_to_change.append('spatial_replication_level_1') for index, item in enumerate(fomated_column_to_change): if item == 'unitobs': fomated_column_to_change[index] = '{}_observation'.format( re.sub('_table', '', self._inputs.tablename)) original_column_names_to_change = list( self._inputs.lnedentry.values()) original_column_names_to_change.append(siteid) for i, item in enumerate(fomated_column_to_change): final.rename( columns={original_column_names_to_change[i]: item}, inplace=True) return final except Exception as e: print(str(e)) raise AttributeError('Column renaming error')
def get_dataframe(self, dataframe, acols, nullcols, keycols, dbcol, globalid, siteid, sitelevels): acols = list(acols) if acols is not None else acols nullcols = list(nullcols) if acols is not None else nullcols keycols = list(keycols) if keycols is not None else keycols dbcol = list(dbcol) if dbcol is not None else dbcol sitelevels = list(sitelevels) if sitelevels is not None else sitelevels dataframe = dataframe try: acols = [x.rstrip() for x in acols] except Exception as e: acols = [int(x) for x in acols] uniquesubset = dataframe[acols] print(str(e)) try: [dbcol.remove(x) for x in keycols] except Exception as e: print(str(e)) try: [nullcols.remove(x) for x in keycols] except Exception as e: print(str(e)) print('SELF INPUTS: ', self._inputs.checks) print('AVAILABLE COLUMNS: ', acols) print('DB COLUMNS: ', dbcol) print('NULL COLUMNS: ', nullcols) print('DF COLUMNS: ', dataframe.columns.values.tolist()) if self._inputs.checks['taxacreate'] is True: dfcol = dataframe.columns.values.tolist() columns_create = [x for x in acols if x not in dfcol] print('CREATE :', columns_create) for i in columns_create: dataframe.loc[:, i] = i print('DF COLUMNS (added): ', dataframe.columns.values.tolist()) else: pass dbcolrevised = [x for x in dbcol if x not in nullcols] print('DB COLUMN REVISED: ', dbcolrevised) uniquesubset_site_list = [] for i, item in enumerate(sitelevels): uniquesubset = dataframe[dataframe[siteid] == item].copy() try: uniquesubset = uniquesubset[acols].copy() except Exception as e: print(str(e)) for j, rename_item in enumerate(dbcolrevised): uniquesubset.rename(columns={acols[j]: rename_item}, inplace=True) unique = uniquesubset.drop_duplicates().copy() unique = unique.reset_index() sitelevel = hlp.produce_null_df(ncols=len(unique), colnames=[siteid], dflength=len(unique), nullvalue=item) nullsubset = hlp.produce_null_df(ncols=len(nullcols), colnames=nullcols, dflength=len(unique), nullvalue='NA') unique = concat([unique, nullsubset, sitelevel], axis=1) uniquesubset_site_list.append(unique) final = uniquesubset_site_list[0] print('final before loop: ', final) for i, item in enumerate(uniquesubset_site_list): if i > 0: final = concat([final, item], ignore_index=True) else: pass print('past subsetting sites') print(final.columns) dbcol.append(siteid) return final[dbcol].copy()
def test_site_in_project_key_number_two( MergeToUpload, site_handle2, file_handle2, meta_handle2, project_handle2, taxa_handle2, time_handle2, count_handle2, covar_handle2, ): facade = face.Facade() facade.input_register(meta_handle2) facade.meta_verify() facade.input_register(file_handle2) facade.load_data() facade.input_register(site_handle2) sitedirector = facade.make_table("siteinfo") study_site_table = sitedirector._availdf siteid = site_handle2.lnedentry["study_site_key"] sitelevels = facade._data[siteid].drop_duplicates().values.tolist() facade.register_site_levels(sitelevels) print("test2 sitelevels: ", sitelevels) facade._valueregister["siteid"] = siteid print("study_site_table (test): ", study_site_table) facade.create_log_record("study_site_table") lter = meta_handle2.lnedentry["lter"] ltercol = produce_null_df(1, ["lter_table_fkey"], len(study_site_table), lter) study_site_table = concat([study_site_table, ltercol], axis=1) study_site_table_og_col = study_site_table.columns.values.tolist() study_site_table_single = study_site_table.iloc[0, :] study_site_table_single_df = DataFrame([study_site_table_single]) study_site_table_single_df.columns = study_site_table_og_col print("study site single: ", study_site_table_single) study_site_table_single_df.loc[0, "study_site_key"] = "NULL" print("study_site_table: ", study_site_table_single_df) facade.push_tables["study_site_table"] = study_site_table_single_df facade.input_register(project_handle2) maindirector = facade.make_table("maininfo") project_table = maindirector._availdf.copy().reset_index(drop=True) orm.convert_types(project_table, orm.project_types) facade.push_tables["project_table"] = project_table facade.create_log_record("project_table") facade.input_register(taxa_handle2) taxadirector = facade.make_table("taxainfo") taxa_table = taxadirector._availdf facade.push_tables["taxa_table"] = taxa_table print("taxa columns after make taxa table: ", taxa_table.columns) facade.create_log_record("taxa_table") print("taxa columns before time_table: ", taxa_table.columns) facade.input_register(time_handle2) timetable = tparse.TimeParse(facade._data, time_handle2.lnedentry).formater() facade.push_tables["timetable"] = timetable facade.create_log_record("timetable") print("taxa columns before count_table: ", taxa_table.columns) facade.input_register(count_handle2) rawdirector = facade.make_table("rawinfo") rawtable = rawdirector._availdf print(rawtable) facade.push_tables[count_handle2.tablename] = rawtable facade.create_log_record(count_handle2.tablename) print("taxa columns before covar_table: ", taxa_table.columns) facade.input_register(covar_handle2) covartable = ddf.DictionaryDataframe(facade._data, covar_handle2.lnedentry["columns"]).convert_records() facade.push_tables["covariates"] = covartable facade.create_log_record("covartable") facade._valueregister["globalid"] = meta_handle2.lnedentry["globalid"] facade._valueregister["lter"] = meta_handle2.lnedentry["lter"] facade._valueregister["siteid"] = siteid timetable_og_cols = timetable.columns.values.tolist() timetable.columns = [x + "_derived" for x in timetable_og_cols] observationdf = facade._data observation_time_df = concat([timetable, observationdf], axis=1) print("merge class obs_time columns: ", observation_time_df.columns) print("merge class project table: ", project_table) study_site_table.to_sql("study_site_table", orm.conn, if_exists="append", index=False) project_table["lter_project_fkey"] = facade._valueregister["lter"] project_table.to_sql("project_table", orm.conn, if_exists="append", index=False) print("taxa columns before site_in_proj method: ", taxa_table.columns) merge_object = MergeToUpload() site_in_project_key_df = merge_object.site_in_proj_key_df( studysitetabledf=study_site_table, projecttabledf=project_table, observationtabledf=observation_time_df, lterlocation=facade._valueregister["lter"], studysitelabel=siteid, studysitelevels=sitelevels, ) print("taxa columns before user taxa merge method: ", taxa_table.columns) merge_object.merge_for_taxa_table_upload( formated_taxa_table=taxa_table, siteinprojkeydf=site_in_project_key_df, sitelabel=siteid ) taxa_column_in_data = [x[0] for x in list(facade._inputs["taxainfo"].lnedentry.items())] taxa_column_in_push_table = [x[1] for x in list(facade._inputs["taxainfo"].lnedentry.items())] merge_object.merge_for_datatype_table_upload( raw_dataframe=observation_time_df, formated_dataframe=rawtable, formated_dataframe_name="{}".format(re.sub("_table", "", facade._inputs["rawinfo"].tablename)), covariate_dataframe=covartable, siteinprojkeydf=site_in_project_key_df, raw_data_taxa_columns=taxa_column_in_data, uploaded_taxa_columns=taxa_column_in_push_table, )
def test_site_in_project_key( MergeToUpload, site_handle_3_biomass, file_handle_3_biomass, meta_handle_3_biomass, project_handle_3_biomass, taxa_handle_3_biomass, time_handle_3_biomass, count_handle_3_biomass, covar_handle_3_biomass): facade = face.Facade() facade.input_register(meta_handle_3_biomass) facade.meta_verify() facade.input_register(file_handle_3_biomass) facade.load_data() facade.input_register(site_handle_3_biomass) sitedirector = facade.make_table('siteinfo') study_site_table = sitedirector._availdf print('study_site_table (test): ', study_site_table) facade.create_log_record('study_site_table') lter = meta_handle_3_biomass.lnedentry['lter'] ltercol = produce_null_df(1, [ 'lter_table_fkey'], len(study_site_table), lter) study_site_table = concat([study_site_table, ltercol], axis=1) print('study_site_table: ', study_site_table) facade.push_tables['study_site_table'] = study_site_table siteid = site_handle_3_biomass.lnedentry['study_site_key'] sitelevels = facade._data[ siteid].drop_duplicates().values.tolist() facade.register_site_levels(sitelevels) facade._valueregister['siteid'] = siteid facade.input_register(project_handle_3_biomass) maindirector = facade.make_table('maininfo') project_table = maindirector._availdf.copy().reset_index(drop=True) orm.convert_types(project_table, orm.project_types) facade.push_tables['project_table'] = project_table facade.create_log_record('project_table') facade.input_register(taxa_handle_3_biomass) taxadirector = facade.make_table('taxainfo') taxa_table = taxadirector._availdf facade.push_tables['taxa_table'] = taxa_table facade.create_log_record('taxa_table') facade.input_register(time_handle_3_biomass) timetable = tparse.TimeParse( facade._data, time_handle_3_biomass.lnedentry).formater() facade.push_tables['timetable'] = timetable facade.create_log_record('timetable') facade.input_register(count_handle_3_biomass) rawdirector = facade.make_table('rawinfo') rawtable = rawdirector._availdf print('rawtable facade tmaker: ', rawtable) facade.push_tables[count_handle_3_biomass.tablename] = rawtable facade.create_log_record(count_handle_3_biomass.tablename) facade.input_register(covar_handle_3_biomass) covartable = ddf.DictionaryDataframe( facade._data, covar_handle_3_biomass.lnedentry['columns']).convert_records() facade.push_tables['covariates'] = covartable facade.create_log_record('covartable') facade._valueregister['globalid'] = meta_handle_3_biomass.lnedentry['globalid'] facade._valueregister['lter'] = meta_handle_3_biomass.lnedentry['lter'] facade._valueregister['siteid'] = siteid timetable_og_cols = timetable.columns.values.tolist() timetable.columns = [x+'_derived' for x in timetable_og_cols] observationdf = facade._data observation_time_df = concat([timetable,observationdf], axis=1 ) print('merge class obs_time df: ', observation_time_df) print('merge class obs_time columns: ', observation_time_df.columns) print('merge class project table: ', project_table) try: study_site_table.to_sql( 'study_site_table', orm.conn, if_exists='append', index=False) except Exception as e: print('Sites in db: ', str(e)) project_table['lter_project_fkey'] = facade._valueregister['lter'] project_table.to_sql( 'project_table', orm.conn, if_exists='append', index=False ) merge_object = MergeToUpload() site_in_project_key_df = merge_object.site_in_proj_key_df( studysitetabledf=study_site_table, projecttabledf=project_table, observationtabledf=observation_time_df, lterlocation= facade._valueregister['lter'], studysitelabel=siteid, studysitelevels=sitelevels ) merge_object.merge_for_taxa_table_upload( formated_taxa_table=taxa_table, siteinprojkeydf=site_in_project_key_df, sitelabel=siteid ) taxa_column_in_data = [ x[1] for x in list(facade._inputs['taxainfo'].lnedentry.items()) ] taxa_column_in_push_table = [ x[0] for x in list(facade._inputs['taxainfo'].lnedentry.items()) ] merge_object.merge_for_datatype_table_upload( raw_dataframe=observation_time_df, formated_dataframe=rawtable, formated_dataframe_name= '{}'.format( re.sub('_table', '', facade._inputs['rawinfo'].tablename)), covariate_dataframe = covartable, siteinprojkeydf=site_in_project_key_df, raw_data_taxa_columns=taxa_column_in_data, uploaded_taxa_columns=taxa_column_in_push_table ) obs_columns_in_data = [ x[1] for x in list(facade._inputs['rawinfo'].lnedentry.items()) ] obs_columns_in_push_table = [ x[0] for x in list(facade._inputs['rawinfo'].lnedentry.items()) ] merge_object.update_project_table( spatial_rep_columns_from_og_df=obs_columns_in_data, spatial_rep_columns_from_formated_df=obs_columns_in_push_table)
def save_close(self): self.update_data() session = orm.Session() sitecheck = session.query( orm.Sitetable.siteid).order_by( orm.Sitetable.siteid) session.close() sitecheckdf = read_sql( sitecheck.statement, sitecheck.session.bind) changed_df = self.sitetablemodel.data( None, QtCore.Qt.UserRole) changed_site_list = changed_df['siteid'].values.tolist() if sitecheckdf is not None: if len(sitecheckdf) == 0: checker = True else: records_entered = sitecheckdf[ 'siteid'].values.tolist() check = [ x for x in list(set(records_entered)) if x in changed_site_list] checker = (len(check) == 0) else: checker = True if checker is True: pass else: self._log.debug('SiteId present under different LTER') self.error.showMessage( 'Site abbreviations already in database ' + 'from an different LTER. Please modify ' + 'site abbreviations.') raise AttributeError( 'SiteID already present under different LTER') self.save_data = self.sitetablemodel.data( None, QtCore.Qt.UserRole) # Updating site levels self.facade.register_site_levels( self.facade._data[ self.siteloc[ 'siteid']].drop_duplicates().values.tolist()) if len(self.save_data) == 0: self.save_data= self.save_data.append( DataFrame( { 'siteid':'NULL', 'lat': 'nan', 'lng': 'nan', 'descript': 'NULL' }, index=[0]) ) else: pass lterid_df = hlp.produce_null_df( 1, ['lterid'], len(self.save_data), self.lter) print(lterid_df) self.save_data = concat( [self.save_data, lterid_df] , axis=1).reset_index(drop=True) print(self.save_data) self.facade.push_tables['sitetable'] = self.save_data hlp.write_column_to_log( self.sitelned, self._log, 'sitetable_c') oldsitetable = hlp.produce_null_df( len(self.save_data.columns), self.save_data.columns.values.tolist(), len(self.save_data), 'nan' ) hlp.updated_df_values( oldsitetable, self.save_data, self._log, 'sitetable' ) self.climatesite_unlocks.emit(self.facade._data) self._log.debug( 'facade site levels' + ' '.join(self.facade._valueregister['sitelevels'])) self.submit_change() self.close()
def get_dataframe( self, dataframe, acols, nullcols, keycols, dbcol, globalid, siteid, sitelevels): acols = list(acols) if acols is not None else acols nullcols = list(nullcols) if acols is not None else nullcols keycols = list(keycols) if keycols is not None else keycols dbcol = list(dbcol) if dbcol is not None else dbcol sitelevels = list(sitelevels) if sitelevels is not None else sitelevels [ '{}_observation'.format( re.sub('_table', '', self._inputs.tablename)) for x in acols if x == 'unitobs'] [ '{}_observation'.format( re.sub('_table', '', self._inputs.tablename)) for x in nullcols if x == 'unitobs'] print('obs acols: ', acols) print('obs nullcols: ', nullcols) if self._inputs.tablename == 'individual_table': try: acols.remove('') except Exception as e: print('no individual column to remove: ', str(e)) else: pass try: acols = [x.rstrip() for x in acols] except Exception as e: acols = [int(x) for x in acols] uniquesubset = dataframe[acols] print(str(e)) # Insert siteid column and remove # spatial rep 1 from null columns (already have data # in siteid column of raw data) acols.insert(0, siteid) nullcols.remove('spatial_replication_level_1') nullcols.remove( '{}_observation'.format( re.sub('_table', '', self._inputs.tablename) )) columns_to_be_added_later = [ 'year', 'month', 'day', 'covariates'] [nullcols.remove(x) for x in columns_to_be_added_later] [nullcols.remove(x) for x in keycols] if self._inputs.foreignmergeddata is None: pass else: columns_where_data_is_from_query = [ 'taxa_{}_fkey'.format( re.sub('_table', '', self._inputs.tablename)), 'site_in_project_{}_fkey'.format( re.sub('_table', '', self._inputs.tablename) ) ] [acols.append(x) for x in columns_where_data_is_from_query] uniquesubset = dataframe[acols] print('uniquesub: ', uniquesubset) nullsubset = hlp.produce_null_df( ncols=len(nullcols), colnames=nullcols, dflength=len(uniquesubset), nullvalue='NA') print('null subset: ', nullsubset) _concat = concat( [uniquesubset, nullsubset], axis=1).reset_index( ) final = _concat.reset_index() if self._inputs.tablename == 'individual_table': final['individual_observation'] = 1 print('should have added individual observation') else: pass print('final build class columns: ', final.columns) try: fomated_column_to_change = list(self._inputs.lnedentry.keys()) fomated_column_to_change.append('spatial_replication_level_1') for index, item in enumerate(fomated_column_to_change): if item == 'unitobs': fomated_column_to_change[index] = '{}_observation'.format( re.sub( '_table', '', self._inputs.tablename)) original_column_names_to_change = list( self._inputs.lnedentry.values()) original_column_names_to_change.append(siteid) for i, item in enumerate(fomated_column_to_change): final.rename( columns={ original_column_names_to_change[i]: item}, inplace=True) return final except Exception as e: print(str(e)) raise AttributeError('Column renaming error')
def get_dataframe( self, dataframe, acols, nullcols, keycols, dbcol, globalid, siteid, sitelevels): acols = list(acols) if acols is not None else acols nullcols = list(nullcols) if acols is not None else nullcols keycols = list(keycols) if keycols is not None else keycols dbcol = list(dbcol) if dbcol is not None else dbcol sitelevels = list(sitelevels) if sitelevels is not None else sitelevels try: acols = [x.rstrip() for x in acols] except Exception as e: acols = [int(x) for x in acols] uniquesubset = dataframe[acols] print(str(e)) try: [dbcol.remove(x) for x in keycols] except Exception as e: print(str(e)) try: [nullcols.remove(x) for x in keycols] except Exception as e: print(str(e)) print('SELF INPUTS: ', self._inputs.checks) print('AVAILABLE COLUMNS: ', acols) print('DB COLUMNS: ', dbcol) print('NULL COLUMNS: ', nullcols) print('DF COLUMNS: ', dataframe.columns.values.tolist()) if self._inputs.checks['taxacreate'] is True: dfcol = dataframe.columns.values.tolist() columns_create = [x for x in acols if x not in dfcol] print('CREATE :', columns_create) for i in columns_create: dataframe.loc[:, i] = i print('DF COLUMNS (added): ', dataframe.columns.values.tolist()) else: pass dbcolrevised = [x for x in dbcol if x not in nullcols] print('DB COLUMN REVISED: ', dbcolrevised) uniquesubset_site_list = [] for i,item in enumerate(sitelevels): uniquesubset = dataframe[dataframe[siteid]==item].copy() try: uniquesubset = uniquesubset[acols].copy() except Exception as e: print(str(e)) for j, rename_item in enumerate(dbcolrevised): uniquesubset.rename( columns={acols[j]: rename_item}, inplace=True) unique = uniquesubset.drop_duplicates().copy() unique = unique.reset_index() sitelevel = hlp.produce_null_df( ncols=len(unique), colnames=[siteid], dflength=len(unique), nullvalue=item) nullsubset = hlp.produce_null_df( ncols=len(nullcols), colnames=nullcols, dflength=len(unique), nullvalue='NA') unique = concat( [unique, nullsubset, sitelevel], axis=1) uniquesubset_site_list.append(unique) final = uniquesubset_site_list[0] print('final before loop: ', final) for i, item in enumerate(uniquesubset_site_list): if i > 0: final = concat([final, item], ignore_index=True) else: pass print('past subsetting sites') print(final.columns) dbcol.append(siteid) return final[dbcol].copy()
def get_dataframe( self, dataframe, acols, nullcols, keycols, dbcol, globalid, siteid, sitelevels): ''' Method to concatenate a study_site_table based on informatoin supplied by the user (acols), expected columns in table (dbcol), Columns to be filled with NA (nullcols), and the globalid, siteid, and unique site levels acols: columns returned from the GUI (i.e. line edit entries) dbcol: all columns within the table nullcols: all columns within the table that HAVE to have NA's generated by the table builder keycols: primary and foreign keys in the table (Typically what are removed from the nullcol list) ''' acols = list(acols) if acols is not None else acols nullcols = list(nullcols) if acols is not None else nullcols keycols = list(keycols) if keycols is not None else keycols dbcol = list(dbcol) if dbcol is not None else dbcol sitelevels = list(sitelevels) if sitelevels is not None else sitelevels print('acols before: ', acols) print('nullcols before: ', nullcols) print('dbcol before: ', dbcol) try: acols = [x.rstrip() for x in acols] except Exception as e: acols = [int(x) for x in acols] uniquesubset = dataframe[acols] print(str(e)) try: remove_from_null = ['lter_table_fkey'] [nullcols.remove(x) for x in remove_from_null] except Exception as e: print(str(e)) try: remove_known_fkey = ['lter_table_fkey'] [dbcol.remove(x) for x in remove_known_fkey] except Exception as e: print(str(e)) try: lat_lng_null_list = ['lat_study_site', 'lng_study_site'] [nullcols.remove(x) for x in lat_lng_null_list] except Exception as e: print(str(e)) print('acols after: ', acols) print('nullcols after: ', nullcols) print('dbcol after: ', dbcol) uniquesubset = dataframe[acols] uniquesubset.columns = ['study_site_key'] nullcols_non_numeric = hlp.produce_null_df( ncols=len(nullcols), colnames=nullcols, dflength=len(uniquesubset), nullvalue='NA') nullcols_numeric = hlp.produce_null_df( ncols=len(lat_lng_null_list), colnames=lat_lng_null_list, dflength=len(uniquesubset), nullvalue='-99999') _concat = concat( [uniquesubset, nullcols_non_numeric, nullcols_numeric], axis=1).reset_index(drop=True) final = _concat.drop_duplicates().reset_index(drop=True) return final
def get_dataframe( self, dataframe, acols, nullcols, dbcol, globalid, siteid, sitelevels): acols = list(acols) nullcols = list(nullcols) keycols = list(keycols) dbcol = list(dbcol) sitelevels = list(sitelevels) acols = [x.rstrip() for x in acols] nullcols = [x.rstrip() for x in nullcols] dbcol = [x.rstrip() for x in dbcol] col_booleans = list(self._inputs.checks.values()) col_names = list(self._inputs.checks.keys()) acols = [ x.rstrip() for x,y in zip(acols, col_booleans) if y is False] acols_rename = [ x.rstrip() for x,y in zip(col_names, col_booleans) if y is False] nullcols = [ x.rstrip() for x,y in zip(col_names, col_booleans) if y is True] dbcol.remove('stationid') for i in dbcol: if i not in nullcols: nullcols.append(i) else: pass print('siteid: ', siteid) print('col bools: ', col_booleans) print('avaialable cols: ', acols) print('null cols: ', nullcols) print('db cols: ', dbcol) print('dataframe climate build: ', dataframe) acols.append(siteid) try: uniquesubset = dataframe[acols] except Exception as e: acols = [int(x) for x in acols] uniquesubset = dataframe[acols] print(str(e)) nullsubset = hlp.produce_null_df( ncols=len(nullcols), colnames=nullcols, dflength=len(uniquesubset), nullvalue='NA') print('uq subset build: ', uniquesubset) _concat = concat( [uniquesubset, nullsubset], axis=1).reset_index( ) final = _concat.reset_index() try: print('build siteid: ', siteid) acols_rename.append('stationid') for i,item in enumerate(acols_rename): final.rename( columns={acols[i]:item}, inplace=True) print('final build class: ', final.columns) return final except Exception as e: print(str(e)) raise AttributeError('Column renaming error')
def save_close(self): self.update_data() session = orm.Session() sitecheck = session.query(orm.Sitetable.siteid).order_by( orm.Sitetable.siteid) session.close() sitecheckdf = read_sql(sitecheck.statement, sitecheck.session.bind) changed_df = self.sitetablemodel.data(None, QtCore.Qt.UserRole) changed_site_list = changed_df['siteid'].values.tolist() if sitecheckdf is not None: if len(sitecheckdf) == 0: checker = True else: records_entered = sitecheckdf['siteid'].values.tolist() check = [ x for x in list(set(records_entered)) if x in changed_site_list ] checker = (len(check) == 0) else: checker = True if checker is True: pass else: self._log.debug('SiteId present under different LTER') self.error.showMessage('Site abbreviations already in database ' + 'from an different LTER. Please modify ' + 'site abbreviations.') raise AttributeError('SiteID already present under different LTER') self.save_data = self.sitetablemodel.data(None, QtCore.Qt.UserRole) # Updating site levels self.facade.register_site_levels(self.facade._data[ self.siteloc['siteid']].drop_duplicates().values.tolist()) if len(self.save_data) == 0: self.save_data = self.save_data.append( DataFrame( { 'siteid': 'NULL', 'lat': 'nan', 'lng': 'nan', 'descript': 'NULL' }, index=[0])) else: pass lterid_df = hlp.produce_null_df(1, ['lterid'], len(self.save_data), self.lter) print(lterid_df) self.save_data = concat([self.save_data, lterid_df], axis=1).reset_index(drop=True) print(self.save_data) self.facade.push_tables['sitetable'] = self.save_data hlp.write_column_to_log(self.sitelned, self._log, 'sitetable_c') oldsitetable = hlp.produce_null_df( len(self.save_data.columns), self.save_data.columns.values.tolist(), len(self.save_data), 'nan') hlp.updated_df_values(oldsitetable, self.save_data, self._log, 'sitetable') self.climatesite_unlocks.emit(self.facade._data) self._log.debug('facade site levels' + ' '.join(self.facade._valueregister['sitelevels'])) self.submit_change() self.close()
def test_site_in_project_key( MergeToUpload, site_handle_4_percent_cover, file_handle_4_percent_cover, meta_handle_4_percent_cover, project_handle_4_percent_cover, taxa_handle_4_percent_cover, time_handle_4_percent_cover, biomass_handle_4_percent_cover, covar_handle_4_percent_cover, ): facade = face.Facade() facade.input_register(meta_handle_4_percent_cover) facade.meta_verify() facade.input_register(file_handle_4_percent_cover) facade.load_data() facade.input_register(site_handle_4_percent_cover) sitedirector = facade.make_table("siteinfo") study_site_table = sitedirector._availdf print("study_site_table (test): ", study_site_table) facade.create_log_record("study_site_table") lter = meta_handle_4_percent_cover.lnedentry["lter"] ltercol = produce_null_df(1, ["lter_table_fkey"], len(study_site_table), lter) study_site_table = concat([study_site_table, ltercol], axis=1) print("study_site_table: ", study_site_table) facade.push_tables["study_site_table"] = study_site_table siteid = site_handle_4_percent_cover.lnedentry["study_site_key"] sitelevels = facade._data[siteid].drop_duplicates().values.tolist() facade.register_site_levels(sitelevels) facade._valueregister["siteid"] = siteid facade.input_register(project_handle_4_percent_cover) maindirector = facade.make_table("maininfo") project_table = maindirector._availdf.copy().reset_index(drop=True) orm.convert_types(project_table, orm.project_types) facade.push_tables["project_table"] = project_table facade.create_log_record("project_table") facade.input_register(taxa_handle_4_percent_cover) taxadirector = facade.make_table("taxainfo") taxa_table = taxadirector._availdf facade.push_tables["taxa_table"] = taxa_table facade.create_log_record("taxa_table") facade.input_register(time_handle_4_percent_cover) timetable = tparse.TimeParse(facade._data, time_handle_4_percent_cover.lnedentry).formater() facade.push_tables["timetable"] = timetable facade.create_log_record("timetable") facade.input_register(biomass_handle_4_percent_cover) rawdirector = facade.make_table("rawinfo") rawtable = rawdirector._availdf print(rawtable) facade.push_tables[biomass_handle_4_percent_cover.tablename] = rawtable facade.create_log_record(biomass_handle_4_percent_cover.tablename) facade.input_register(covar_handle_4_percent_cover) covartable = ddf.DictionaryDataframe( facade._data, covar_handle_4_percent_cover.lnedentry["columns"] ).convert_records() facade.push_tables["covariates"] = covartable facade.create_log_record("covartable") facade._valueregister["globalid"] = meta_handle_4_percent_cover.lnedentry["globalid"] facade._valueregister["lter"] = meta_handle_4_percent_cover.lnedentry["lter"] facade._valueregister["siteid"] = siteid timetable_og_cols = timetable.columns.values.tolist() timetable.columns = [x + "_derived" for x in timetable_og_cols] observationdf = facade._data observation_time_df = concat([timetable, observationdf], axis=1) print("merge class obs_time columns: ", observation_time_df.columns) print("merge class project table: ", project_table) try: study_site_table.to_sql("study_site_table", orm.conn, if_exists="append", index=False) except Exception as e: print("Sites in db: ", str(e)) project_table["lter_project_fkey"] = facade._valueregister["lter"] project_table.to_sql("project_table", orm.conn, if_exists="append", index=False) merge_object = MergeToUpload() site_in_project_key_df = merge_object.site_in_proj_key_df( studysitetabledf=study_site_table, projecttabledf=project_table, observationtabledf=observation_time_df, lterlocation=facade._valueregister["lter"], studysitelabel=siteid, studysitelevels=sitelevels, ) merge_object.merge_for_taxa_table_upload( formated_taxa_table=taxa_table, siteinprojkeydf=site_in_project_key_df, sitelabel=siteid ) taxa_column_in_push_table = [x[0] for x in list(facade._inputs["taxainfo"].lnedentry.items())] taxa_column_in_data = [x[1] for x in list(facade._inputs["taxainfo"].lnedentry.items())] merge_object.merge_for_datatype_table_upload( raw_dataframe=observation_time_df, formated_dataframe=rawtable, formated_dataframe_name="{}".format(re.sub("_table", "", facade._inputs["rawinfo"].tablename)), covariate_dataframe=covartable, siteinprojkeydf=site_in_project_key_df, raw_data_taxa_columns=taxa_column_in_data, uploaded_taxa_columns=taxa_column_in_push_table, ) obs_columns_in_data = [x[1] for x in list(facade._inputs["rawinfo"].lnedentry.items())] obs_columns_in_push_table = [x[0] for x in list(facade._inputs["rawinfo"].lnedentry.items())] merge_object.update_project_table( spatial_rep_columns_from_og_df=obs_columns_in_data, spatial_rep_columns_from_formated_df=obs_columns_in_push_table, )
def get_dataframe(self, dataframe, acols, nullcols, dbcol, globalid, siteid, sitelevels): acols = list(acols) nullcols = list(nullcols) keycols = list(keycols) dbcol = list(dbcol) sitelevels = list(sitelevels) acols = [x.rstrip() for x in acols] nullcols = [x.rstrip() for x in nullcols] dbcol = [x.rstrip() for x in dbcol] col_booleans = list(self._inputs.checks.values()) col_names = list(self._inputs.checks.keys()) acols = [x.rstrip() for x, y in zip(acols, col_booleans) if y is False] acols_rename = [ x.rstrip() for x, y in zip(col_names, col_booleans) if y is False ] nullcols = [ x.rstrip() for x, y in zip(col_names, col_booleans) if y is True ] dbcol.remove('stationid') for i in dbcol: if i not in nullcols: nullcols.append(i) else: pass print('siteid: ', siteid) print('col bools: ', col_booleans) print('avaialable cols: ', acols) print('null cols: ', nullcols) print('db cols: ', dbcol) print('dataframe climate build: ', dataframe) acols.append(siteid) try: uniquesubset = dataframe[acols] except Exception as e: acols = [int(x) for x in acols] uniquesubset = dataframe[acols] print(str(e)) nullsubset = hlp.produce_null_df(ncols=len(nullcols), colnames=nullcols, dflength=len(uniquesubset), nullvalue='NA') print('uq subset build: ', uniquesubset) _concat = concat([uniquesubset, nullsubset], axis=1).reset_index() final = _concat.reset_index() try: print('build siteid: ', siteid) acols_rename.append('stationid') for i, item in enumerate(acols_rename): final.rename(columns={acols[i]: item}, inplace=True) print('final build class: ', final.columns) return final except Exception as e: print(str(e)) raise AttributeError('Column renaming error')
def save_close(self): ''' Method to save the study_site_table as it is seen by the user (matching sites that were accepted by user are removed from the saved table because it will be pushed) ''' self.updated_from_query_matches = False # Retrieve study_site_table data from user view self.save_data = self.sitetablemodel.data(None, QtCore.Qt.UserRole) # If there are no site because they are already # in the database then create an empty dataframe if len(self.save_data) == 0: self.save_data = self.save_data.append( DataFrame( { 'study_site_key': 'NULL', 'lat_study_site': 'nan', 'lng_study_site': 'nan', 'descript': 'NULL' }, index=[0])) else: pass # Append dataframe with current LTER lterid_df = hlp.produce_null_df(1, ['lter_table_fkey'], len(self.save_data), self.lter) print(lterid_df) self.save_data = concat([self.save_data, lterid_df], axis=1).reset_index(drop=True) #Convert types and strip stings numeric_cols = ['lat_study_site', 'lng_study_site'] self.save_data[self.save_data.columns.difference( numeric_cols)] = self.save_data[self.save_data.columns.difference( numeric_cols)].applymap(str) self.save_data[self.save_data.columns.difference( numeric_cols)] = self.save_data[self.save_data.columns.difference( numeric_cols)].applymap(lambda x: x.strip()) self.save_data[numeric_cols] = to_numeric(self.save_data[numeric_cols], errors='coerce') print('Pushed dataset: ', self.save_data) self.facade.push_tables['study_site_table'] = self.save_data # Helpers to keep track of user changes to site names hlp.write_column_to_log(self.sitelned, self._log, 'sitetable_c') oldsitetable = hlp.produce_null_df( len(self.save_data.columns), self.save_data.columns.values.tolist(), len(self.save_data), 'nan') hlp.updated_df_values(oldsitetable, self.save_data, self._log, 'sitetable') # Signal to confim this form has been completed and # user can move on to other tables self.site_unlocks.emit(self.facade._data) self._log.debug('facade site levels' + ' '.join(self.facade._valueregister['sitelevels'])) self._log.debug( 'sitelevels (Save Block): ' + ' '.join(self.save_data['study_site_key'].values.tolist())) self.close()
def get_dataframe(self, dataframe, acols, nullcols, keycols, dbcol, globalid, siteid, sitelevels): ''' Method to concatenate a study_site_table based on informatoin supplied by the user (acols), expected columns in table (dbcol), Columns to be filled with NA (nullcols), and the globalid, siteid, and unique site levels acols: columns returned from the GUI (i.e. line edit entries) dbcol: all columns within the table nullcols: all columns within the table that HAVE to have NA's generated by the table builder keycols: primary and foreign keys in the table (Typically what are removed from the nullcol list) ''' acols = list(acols) if acols is not None else acols nullcols = list(nullcols) if acols is not None else nullcols keycols = list(keycols) if keycols is not None else keycols dbcol = list(dbcol) if dbcol is not None else dbcol sitelevels = list(sitelevels) if sitelevels is not None else sitelevels print('acols before: ', acols) print('nullcols before: ', nullcols) print('dbcol before: ', dbcol) try: acols = [x.rstrip() for x in acols] except Exception as e: acols = [int(x) for x in acols] uniquesubset = dataframe[acols] print(str(e)) try: remove_from_null = ['lter_table_fkey'] [nullcols.remove(x) for x in remove_from_null] except Exception as e: print(str(e)) try: remove_known_fkey = ['lter_table_fkey'] [dbcol.remove(x) for x in remove_known_fkey] except Exception as e: print(str(e)) try: lat_lng_null_list = ['lat_study_site', 'lng_study_site'] [nullcols.remove(x) for x in lat_lng_null_list] except Exception as e: print(str(e)) print('acols after: ', acols) print('nullcols after: ', nullcols) print('dbcol after: ', dbcol) uniquesubset = dataframe[acols] uniquesubset.columns = ['study_site_key'] nullcols_non_numeric = hlp.produce_null_df(ncols=len(nullcols), colnames=nullcols, dflength=len(uniquesubset), nullvalue='NA') nullcols_numeric = hlp.produce_null_df(ncols=len(lat_lng_null_list), colnames=lat_lng_null_list, dflength=len(uniquesubset), nullvalue='-99999') _concat = concat( [uniquesubset, nullcols_non_numeric, nullcols_numeric], axis=1).reset_index(drop=True) final = _concat.drop_duplicates().reset_index(drop=True) return final
def test_site_in_project_key_number_two(MergeToUpload, site_handle2, file_handle2, meta_handle2, project_handle2, taxa_handle2, time_handle2, count_handle2, covar_handle2): facade = face.Facade() facade.input_register(meta_handle2) facade.meta_verify() facade.input_register(file_handle2) facade.load_data() facade.input_register(site_handle2) sitedirector = facade.make_table('siteinfo') study_site_table = sitedirector._availdf siteid = site_handle2.lnedentry['study_site_key'] sitelevels = facade._data[siteid].drop_duplicates().values.tolist() facade.register_site_levels(sitelevels) print('test2 sitelevels: ', sitelevels) facade._valueregister['siteid'] = siteid print('study_site_table (test): ', study_site_table) facade.create_log_record('study_site_table') lter = meta_handle2.lnedentry['lter'] ltercol = produce_null_df(1, ['lter_table_fkey'], len(study_site_table), lter) study_site_table = concat([study_site_table, ltercol], axis=1) study_site_table_og_col = study_site_table.columns.values.tolist() study_site_table_single = study_site_table.iloc[0, :] study_site_table_single_df = DataFrame([study_site_table_single]) study_site_table_single_df.columns = study_site_table_og_col print('study site single: ', study_site_table_single) study_site_table_single_df.loc[0, 'study_site_key'] = 'NULL' print('study_site_table: ', study_site_table_single_df) facade.push_tables['study_site_table'] = study_site_table_single_df facade.input_register(project_handle2) maindirector = facade.make_table('maininfo') project_table = maindirector._availdf.copy().reset_index(drop=True) orm.convert_types(project_table, orm.project_types) facade.push_tables['project_table'] = project_table facade.create_log_record('project_table') facade.input_register(taxa_handle2) taxadirector = facade.make_table('taxainfo') taxa_table = taxadirector._availdf facade.push_tables['taxa_table'] = taxa_table print('taxa columns after make taxa table: ', taxa_table.columns) facade.create_log_record('taxa_table') print('taxa columns before time_table: ', taxa_table.columns) facade.input_register(time_handle2) timetable = tparse.TimeParse(facade._data, time_handle2.lnedentry).formater() facade.push_tables['timetable'] = timetable facade.create_log_record('timetable') print('taxa columns before count_table: ', taxa_table.columns) facade.input_register(count_handle2) rawdirector = facade.make_table('rawinfo') rawtable = rawdirector._availdf print(rawtable) facade.push_tables[count_handle2.tablename] = rawtable facade.create_log_record(count_handle2.tablename) print('taxa columns before covar_table: ', taxa_table.columns) facade.input_register(covar_handle2) covartable = ddf.DictionaryDataframe( facade._data, covar_handle2.lnedentry['columns']).convert_records() facade.push_tables['covariates'] = covartable facade.create_log_record('covartable') facade._valueregister['globalid'] = meta_handle2.lnedentry['globalid'] facade._valueregister['lter'] = meta_handle2.lnedentry['lter'] facade._valueregister['siteid'] = siteid timetable_og_cols = timetable.columns.values.tolist() timetable.columns = [x + '_derived' for x in timetable_og_cols] observationdf = facade._data observation_time_df = concat([timetable, observationdf], axis=1) print('merge class obs_time columns: ', observation_time_df.columns) print('merge class project table: ', project_table) study_site_table.to_sql('study_site_table', orm.conn, if_exists='append', index=False) project_table['lter_project_fkey'] = facade._valueregister['lter'] project_table.to_sql('project_table', orm.conn, if_exists='append', index=False) print('taxa columns before site_in_proj method: ', taxa_table.columns) merge_object = MergeToUpload() site_in_project_key_df = merge_object.site_in_proj_key_df( studysitetabledf=study_site_table, projecttabledf=project_table, observationtabledf=observation_time_df, lterlocation=facade._valueregister['lter'], studysitelabel=siteid, studysitelevels=sitelevels) print('taxa columns before user taxa merge method: ', taxa_table.columns) merge_object.merge_for_taxa_table_upload( formated_taxa_table=taxa_table, siteinprojkeydf=site_in_project_key_df, sitelabel=siteid) taxa_column_in_data = [ x[0] for x in list(facade._inputs['taxainfo'].lnedentry.items()) ] taxa_column_in_push_table = [ x[1] for x in list(facade._inputs['taxainfo'].lnedentry.items()) ] merge_object.merge_for_datatype_table_upload( raw_dataframe=observation_time_df, formated_dataframe=rawtable, formated_dataframe_name='{}'.format( re.sub('_table', '', facade._inputs['rawinfo'].tablename)), covariate_dataframe=covartable, siteinprojkeydf=site_in_project_key_df, raw_data_taxa_columns=taxa_column_in_data, uploaded_taxa_columns=taxa_column_in_push_table)
def save_close(self): ''' Method to save the study_site_table as it is seen by the user (matching sites that were accepted by user are removed from the saved table because it will be pushed) ''' update_message = QtGui.QMessageBox.question( self,'Message', 'Did you update records?', QtGui.QMessageBox.Yes, QtGui.QMessageBox.No) if update_message == QtGui.QMessageBox.No: return else: pass # Retrieve study_site_table data from user view save_data = self.sitetablemodel.data( None, QtCore.Qt.UserRole) self.save_data = save_data.drop_duplicates() print('saved data (initial): ', self.save_data) self.facade.register_site_levels( self.facade._data[ self.siteloc[ 'study_site_key']].drop_duplicates().values.tolist()) # If there are no site because they are already # in the database then create an empty dataframe if len(self.save_data) == 0: self.save_data= self.save_data.append( DataFrame( { 'study_site_key':'NULL', 'lat_study_site': 'nan', 'lng_study_site': 'nan', 'descript': 'NULL' }, index=[0]) ) else: pass # Append dataframe with current LTER lterid_df = hlp.produce_null_df( 1, ['lter_table_fkey'], len(self.save_data), self.lter) print(lterid_df) self.save_data = concat( [self.save_data, lterid_df] , axis=1).reset_index(drop=True) #Convert types and strip stings numeric_cols = ['lat_study_site', 'lng_study_site'] self.save_data[ self.save_data.columns.difference(numeric_cols)] = self.save_data[ self.save_data.columns.difference(numeric_cols)].applymap(str) self.save_data[ self.save_data.columns.difference(numeric_cols)] = self.save_data[ self.save_data.columns.difference(numeric_cols)].applymap( lambda x: x.strip()) self.save_data[numeric_cols] = to_numeric( self.save_data[numeric_cols], errors='coerce') print('Pushed dataset: ', self.save_data) self.facade.push_tables['study_site_table'] = self.save_data # Helpers to keep track of user changes to site names hlp.write_column_to_log( self.sitelned, self._log, 'sitetable_c') oldsitetable = hlp.produce_null_df( len(self.save_data.columns), self.save_data.columns.values.tolist(), len(self.save_data), 'nan' ) hlp.updated_df_values( oldsitetable, self.save_data, self._log, 'sitetable' ) # Signal to confim this form has been completed and # user can move on to other tables self.site_unlocks.emit('study_site_mod') site_unsorted = self.facade._data[ self.siteloc[ 'study_site_key']].drop_duplicates().values.tolist() site_unsorted.sort() self.sitelevels = site_unsorted self._log.debug( 'facade site levels' + ' '.join(self.facade._valueregister['sitelevels'])) self._log.debug( 'sitelevels (Save Block): ' + ' '.join(self.sitelevels)) self.saved.append(1) self.close()