def update_data(self): changed_df = self.sitetablemodel.data(None, QtCore.Qt.UserRole) changed_site_list = changed_df['siteid'].values.tolist() self._log.debug('changed_site_list: ' + ' '.join(changed_site_list)) self._log.debug('sitelevels list: ' + ' ' + ' '.join(self.sitelevels)) if len(changed_df) == 0: pass else: try: for i, item in enumerate(changed_site_list): self.facade._data.replace( {self.sitelevels[i]: item.rstrip()}, inplace=True) except Exception as e: print(str(e)) self._log.debug(str(e)) self.error.showMessage('could not alter levels: ' + str(e)) session = orm.Session() sitecheck = session.query(orm.Climatesite).order_by( orm.Climatesite.stationid).filter( orm.Climatesite.lterid == self.facade._valueregister['lterid']) session.close() sitecheckdf = read_sql(sitecheck.statement, sitecheck.session.bind) print('checker df: ', sitecheckdf) if sitecheckdf is not None: if len(sitecheckdf) == 0: checker = True else: records_entered = sitecheckdf['siteid'].values.tolist() check = [ x for x in list(set(records_entered)) if x in changed_site_list ] print('check list: ', check) print('test check: ', len(check) == 0) checker = (len(check) == 0) else: checker = True print('checker status: ', checker) if checker == True: pass else: check_view = view.PandasTableModel( sitecheckdf[sitecheckdf['siteid'].isin(check)]) self.preview_validate.tabviewPreview.setModel(check_view) self.sitequerymodel = check_view self.tabviewDbSiteQuery.setModel(self.sitequerymodel) self.sitelevels = changed_site_list self.preview_validate.show()
def verify_entries(self): ''' This method is going to be used to verify the user input into the program. Ensuring that all the parameter supplied match a given record in the metadata dataframe. ''' if None in (self.idnumber, self.lterloc, self.metaurl): raise AttributeError( 'Not all attributes have been set. Please enter' + 'the globalid, lter location, and metadata url.') else: try: assert self.idnumber > 0 except: raise AttributeError('Plese enter the globalid number.') try: session = orm.Session() global_check_q = session.query( orm.project_table.proj_metadata_key).order_by( orm.project_table.proj_metadata_key) session.close() global_check_df = read_sql(global_check_q.statement, global_check_q.session.bind) uploaded_globals = global_check_df[ 'proj_metadata_key'].values.tolist() assert self.idnumber not in uploaded_globals assert (self._meta.loc[self._meta['global_id'] == self.idnumber]['global_id'] == self.idnumber).bool() is True assert (self._meta.loc[self._meta['global_id'] == self.idnumber]['lter'] == self.lterloc).bool() is True assert (self._meta.loc[self._meta['global_id'] == self.idnumber]['site_metadata'] == self.metaurl).bool() is True return True except Exception as e: print(str(e)) raise LookupError( "The verification attributes have not been set" + " correctly. Or global_id is already present: " + str(e))
def __init__(self): self.session = orm.Session() self.table_types = { 'taxa_table': orm.taxa_types, 'count_table': orm.count_types, 'density_table': orm.density_types, 'biomass_table': orm.biomass_types, 'individual_table': orm.individual_types, 'percent_cover_table': orm.percent_cover_types, 'project_table': orm.project_types } self.rawdata = None self.metadata_key = None self.sitelabel = None self.formateddata = None
def update_data(self): ''' Method to check whether the study sites (study_site_key) have already been uploaded into the database; either from the same LTER or different LTER's. If there are matches it updates the query view table class AND prompts the user to check if the sites about to be entered are exactly the same site as in the db (so studies can be joined by similar LTER study site locations) Note, if the user changes one of the site names, this method will account for that before the 'check' begins ''' # Query to check is study sites are in db session = orm.Session() sitecheck = session.query(orm.study_site_table.__table__).order_by( orm.study_site_table.study_site_key).filter( orm.study_site_table.study_site_key.in_( self.sitelevels_submit_block)) session.close() site_df_from_query = read_sql(sitecheck.statement, sitecheck.session.bind) # Conditionals to make sure that no results were # returned or not and sets check object (checker) # First conditional is in case the database is empty # and/or query returns None print('checker df: ', site_df_from_query) if site_df_from_query is not None: if site_df_from_query.empty is True: no_site_matches_from_db = True else: # If there are matches for study sites # make a list of match names to be promt user # with no_site_matches_from_db = False site_matches_from_db = site_df_from_query[ 'study_site_key'].values.tolist() else: no_site_matches_from_db = True print('no_site_matches_from_db status: ', no_site_matches_from_db) # If there are matches and user confirms they # are the same site's in the database then accept # changes and query site id's again to make sure # no more shared study sites are left if no_site_matches_from_db == True: self.sitetablemodel = self.viewEdit( self.site_table_data_to_upload_to_db.applymap(str)) self.listviewSiteLabels.setModel(self.sitetablemodel) else: query_matches_view = view.PandasTableModel( site_df_from_query[site_df_from_query['study_site_key'].isin( site_matches_from_db)]) self.preview_validate.tabviewPreview.setModel(query_matches_view) self.sitequerymodel = query_matches_view self.tabviewDbSiteQuery.setModel(self.sitequerymodel) self.tabviewDbSiteQuery.resizeColumnsToContents() self.preview_validate.show()
] spatial_label = [study_site_label] spatial_key = ['spatial_replication_level_1_label'] for i in range(len(spatial_index)): spatial_key.append(obs_columns_in_push_table[i] + '_label') spatial_label.append(obs_columns_in_data[i]) update_dict = {} for i, item in enumerate(spatial_key): update_dict[item] = spatial_label[i] orm.conn.execute( update(orm.project_table).where( column('proj_metadata_key') == 1).values(update_dict)) session = orm.Session() sitecheck = session.query(orm.study_site_table.__table__).order_by( orm.study_site_table.__table__.c.study_site_key).filter( orm.study_site_table.__table__.c.lter_table_fkey == 'SBC') sitecheckdf = pd.read_sql(sitecheck.statement, sitecheck.session.bind) sitecheckdf['test'] = [i for i in range(len(sitecheckdf))] sitecheckdf.loc[:, 'test'].idxmax() sitecheckdf.loc[:, 'test'].idxmin() engine = create_engine( 'postgresql+psycopg2://postgres:demography@localhost/popler_3', echo=True) metadata = MetaData(bind=engine) base = declarative_base() conn = engine.connect()
def save_close(self): self.update_data() session = orm.Session() sitecheck = session.query(orm.Sitetable.siteid).order_by( orm.Sitetable.siteid) session.close() sitecheckdf = read_sql(sitecheck.statement, sitecheck.session.bind) changed_df = self.sitetablemodel.data(None, QtCore.Qt.UserRole) changed_site_list = changed_df['siteid'].values.tolist() if sitecheckdf is not None: if len(sitecheckdf) == 0: checker = True else: records_entered = sitecheckdf['siteid'].values.tolist() check = [ x for x in list(set(records_entered)) if x in changed_site_list ] checker = (len(check) == 0) else: checker = True if checker is True: pass else: self._log.debug('SiteId present under different LTER') self.error.showMessage('Site abbreviations already in database ' + 'from an different LTER. Please modify ' + 'site abbreviations.') raise AttributeError('SiteID already present under different LTER') self.save_data = self.sitetablemodel.data(None, QtCore.Qt.UserRole) # Updating site levels self.facade.register_site_levels(self.facade._data[ self.siteloc['siteid']].drop_duplicates().values.tolist()) if len(self.save_data) == 0: self.save_data = self.save_data.append( DataFrame( { 'siteid': 'NULL', 'lat': 'nan', 'lng': 'nan', 'descript': 'NULL' }, index=[0])) else: pass lterid_df = hlp.produce_null_df(1, ['lterid'], len(self.save_data), self.lter) print(lterid_df) self.save_data = concat([self.save_data, lterid_df], axis=1).reset_index(drop=True) print(self.save_data) self.facade.push_tables['sitetable'] = self.save_data hlp.write_column_to_log(self.sitelned, self._log, 'sitetable_c') oldsitetable = hlp.produce_null_df( len(self.save_data.columns), self.save_data.columns.values.tolist(), len(self.save_data), 'nan') hlp.updated_df_values(oldsitetable, self.save_data, self._log, 'sitetable') self.climatesite_unlocks.emit(self.facade._data) self._log.debug('facade site levels' + ' '.join(self.facade._valueregister['sitelevels'])) self.submit_change() self.close()
def merge_for_datatype_table_upload( self, raw_dataframe, formated_dataframe, formated_dataframe_name, covariate_dataframe, siteinprojkeydf, raw_data_taxa_columns, uploaded_taxa_columns): print('start dtype upload') orm.replace_numeric_null_with_string(raw_dataframe) orm.replace_numeric_null_with_string(formated_dataframe) print('replacing nulls is a pain') # Step 2) Query taxa_table to get the auto generated # primary keys returned. Turn query data into # dataframe. session = self.session taxa_key_statement = session.execute( select([orm.taxa_table]). where( orm.taxa_table.__table__.c.site_in_project_taxa_key.in_ (siteinprojkeydf['site_in_project_key'].values.tolist()) ) ) session.close() taxa_key_df = DataFrame(taxa_key_statement.fetchall()) taxa_key_df.columns = taxa_key_statement.keys() taxa_key_df.replace({None: 'NA'}, inplace=True) # Step 3) Subsetting the query tabled for record that only pertain # to the count data (because we will be subsetting from this # queried taxa table later) dtype_subset_taxa_key_df = taxa_key_df[ taxa_key_df['site_in_project_taxa_key'].isin( siteinprojkeydf['site_in_project_key'])] # Step 4) Merge the taxa_table query results with # the site_in_project table query that was performed # to upload the taxa_table (see above). This gives # you a table with site names and taxonomic information # allowing for a merge with the original dtype data tbl_dtype_merged_taxakey_siteinprojectkey = merge( dtype_subset_taxa_key_df, siteinprojkeydf, left_on='site_in_project_taxa_key', right_on='site_in_project_key', how='inner') raw_dataframe_siteinproj = merge( raw_dataframe, siteinprojkeydf, left_on=self.sitelabel, right_on='study_site_table_fkey', sort=False, how='left') raw_data_taxa_columns.append('site_in_project_key') uploaded_taxa_columns.append('site_in_project_taxa_key') raw_data_taxa_columns.append('site_in_project_key') uploaded_taxa_columns.append('site_in_project_taxa_key') print('updated raw data col list: ', raw_dataframe_siteinproj) print('update taxa data col list: ', uploaded_taxa_columns) # Step 5) Merge the original dtype data with the # merged taxa_table query to have all foreign keys... # taxa and site_project # matched up with the original observations. dtype_merged_with_taxa_and_siteinproj_key = merge( raw_dataframe_siteinproj, tbl_dtype_merged_taxakey_siteinprojectkey, left_on=list(raw_data_taxa_columns), right_on=list(uploaded_taxa_columns), how='left') # Step 6) Take the merged original data with all foreign keys, # and merged that with the formatted dtype_table based on index # values (order or records should not changed from the original data # to the formatted data) tbl_dtype_merged_with_all_keys = merge( formated_dataframe, dtype_merged_with_taxa_and_siteinproj_key, left_index=True, right_index=True, how='inner', suffixes=('', '_y')) # Step 7) List the columns that will be needed to push the # dtype table to the database (including foreign keys) tbl_dtype_columns_to_upload = [ 'taxa_table_key', 'site_in_project_taxa_key', 'year_derived', 'month_derived', 'day_derived', 'spatial_replication_level_1', 'spatial_replication_level_2', 'spatial_replication_level_3', 'spatial_replication_level_4', 'spatial_replication_level_5', 'structure_type_1', 'structure_type_2', 'structure_type_3', 'structure_type_4', 'treatment_type_1', 'treatment_type_2', 'treatment_type_3', 'covariates' ] time_cols_rename = { 'year_derived': 'year', 'month_derived': 'month', 'day_derived': 'day' } tbl_dtype_columns_to_upload.append( '{}_observation'.format(str(formated_dataframe_name))) tbl_dtype_merged_with_all_keys = concat( [tbl_dtype_merged_with_all_keys, covariate_dataframe], axis=1) # Step 8) Subsetting the fully merged dtype table data tbl_dtype_to_upload = tbl_dtype_merged_with_all_keys[ tbl_dtype_columns_to_upload] tbl_dtype_to_upload.rename( columns=time_cols_rename, inplace=True ) # Step 9) Renaming columns to reflect that in database table # And converting data types tbl_dtype_to_upload.rename(columns={ 'taxa_table_key': 'taxa_{}_fkey'.format(str(formated_dataframe_name))} , inplace=True) datatype_key = 'site_in_project_{}_fkey'.format(str(formated_dataframe_name)) tbl_dtype_to_upload.rename(columns={ 'site_in_project_taxa_key': datatype_key}, inplace=True) tbl_dtype_to_upload.fillna('NA', inplace=True) self.formateddata = tbl_dtype_to_upload # Step 10) Uploading to the database datatype_table = '{}_table'.format(str(formated_dataframe_name)) datatype_obs = '{}_observation'.format(str(formated_dataframe_name)) print('push raw_before', tbl_dtype_to_upload.columns) tbl_dtype_to_upload[datatype_obs] = to_numeric( tbl_dtype_to_upload[datatype_obs], errors='coerce' ) text_cols = [ 'spatial_replication_level_1', 'spatial_replication_level_2', 'spatial_replication_level_3', 'spatial_replication_level_4', 'spatial_replication_level_5', 'treatment_type_1', 'treatment_type_2', 'treatment_type_3', 'structure_type_1', 'structure_type_2', 'structure_type_3', 'structure_type_4' ] tbl_dtype_to_upload[text_cols] = tbl_dtype_to_upload[ text_cols].applymap(str) tbl_dtype_to_upload[text_cols] = tbl_dtype_to_upload[ text_cols].applymap(lambda x: x.strip()) print(tbl_dtype_to_upload.dtypes) print(self.table_types[datatype_table]) try: orm.convert_types(tbl_dtype_to_upload, self.table_types[datatype_table]) except Exception as e: print('converting issues: ', str(e)) print('push raw_after', tbl_dtype_to_upload.columns) print(tbl_dtype_to_upload.dtypes) print('this should have worked') other_numerics = [ 'year', 'month', 'day', datatype_key, 'taxa_{}_fkey'.format(str(formated_dataframe_name)) ] tbl_dtype_to_upload[datatype_obs] = to_numeric( tbl_dtype_to_upload[datatype_obs], errors='coerce' ) try: tbl_dtype_to_upload[other_numerics].replace({'NA', -99999}, inplace=True) except Exception as e: print('No NAs to replace:', str(e)) try: tbl_dtype_to_upload[other_numerics].replace({'NaN' -99999}, inplace=True) except Exception as e: print('No NaN to replace:', str(e)) try: tbl_dtype_to_upload[other_numerics].replace({None, -99999}, inplace=True) except Exception as e: print('No None to replace:', str(e)) tbl_dtype_to_upload[other_numerics].fillna(-99999, inplace=True) tbl_dtype_to_upload.loc[:, other_numerics] = tbl_dtype_to_upload.loc[ :, other_numerics].apply(to_numeric, errors='coerce') metadata_key_column_name = 'metadata_{}_key'.format( formated_dataframe_name) tbl_dtype_to_upload[metadata_key_column_name] = int(self.metadata_key) # Attempting direct copy_from to copy_to commands # with stringIO (should be faster than pandas) # text buffer sql_datatype_columns = tbl_dtype_to_upload.columns.values.tolist() s_buf = io.StringIO() tbl_dtype_to_upload.to_csv(s_buf, index=False, sep="\t") s_buf.seek(0) session = orm.Session() cur = session.connection().connection.cursor() copy_sql_statement = "COPY {}({}) FROM STDIN WITH CSV HEADER DELIMITER AS '\t'".format( datatype_table, ", ".join(sql_datatype_columns)) cur.copy_expert(copy_sql_statement, s_buf) session.commit() session.close() #tbl_dtype_to_upload.to_sql( # datatype_table, # orm.conn, if_exists='append', index=False, chunksize=1000) print('past datatype upload')