Example #1
0
        def update_data(self):
            changed_df = self.sitetablemodel.data(None, QtCore.Qt.UserRole)
            changed_site_list = changed_df['siteid'].values.tolist()

            self._log.debug('changed_site_list: ' +
                            ' '.join(changed_site_list))
            self._log.debug('sitelevels list: ' + ' ' +
                            ' '.join(self.sitelevels))

            if len(changed_df) == 0:
                pass
            else:
                try:
                    for i, item in enumerate(changed_site_list):
                        self.facade._data.replace(
                            {self.sitelevels[i]: item.rstrip()}, inplace=True)
                except Exception as e:
                    print(str(e))
                    self._log.debug(str(e))
                    self.error.showMessage('could not alter levels: ' + str(e))

            session = orm.Session()
            sitecheck = session.query(orm.Climatesite).order_by(
                orm.Climatesite.stationid).filter(
                    orm.Climatesite.lterid ==
                    self.facade._valueregister['lterid'])
            session.close()
            sitecheckdf = read_sql(sitecheck.statement, sitecheck.session.bind)

            print('checker df: ', sitecheckdf)
            if sitecheckdf is not None:
                if len(sitecheckdf) == 0:
                    checker = True
                else:
                    records_entered = sitecheckdf['siteid'].values.tolist()
                    check = [
                        x for x in list(set(records_entered))
                        if x in changed_site_list
                    ]
                    print('check list: ', check)
                    print('test check: ', len(check) == 0)
                    checker = (len(check) == 0)
            else:
                checker = True

            print('checker status: ', checker)
            if checker == True:
                pass
            else:
                check_view = view.PandasTableModel(
                    sitecheckdf[sitecheckdf['siteid'].isin(check)])
                self.preview_validate.tabviewPreview.setModel(check_view)
                self.sitequerymodel = check_view
                self.tabviewDbSiteQuery.setModel(self.sitequerymodel)
                self.sitelevels = changed_site_list
                self.preview_validate.show()
    def verify_entries(self):
        '''
        This method is going to be used to verify
        the user input into the program. Ensuring that
        all the parameter supplied match a given record
        in the metadata dataframe.
        '''
        if None in (self.idnumber, self.lterloc, self.metaurl):
            raise AttributeError(
                'Not all attributes have been set. Please enter' +
                'the globalid, lter location, and metadata url.')
        else:
            try:
                assert self.idnumber > 0
            except:
                raise AttributeError('Plese enter the globalid number.')

            try:
                session = orm.Session()
                global_check_q = session.query(
                    orm.project_table.proj_metadata_key).order_by(
                        orm.project_table.proj_metadata_key)
                session.close()
                global_check_df = read_sql(global_check_q.statement,
                                           global_check_q.session.bind)

                uploaded_globals = global_check_df[
                    'proj_metadata_key'].values.tolist()

                assert self.idnumber not in uploaded_globals

                assert (self._meta.loc[self._meta['global_id'] ==
                                       self.idnumber]['global_id']
                        == self.idnumber).bool() is True

                assert (self._meta.loc[self._meta['global_id'] ==
                                       self.idnumber]['lter']
                        == self.lterloc).bool() is True

                assert (self._meta.loc[self._meta['global_id'] ==
                                       self.idnumber]['site_metadata']
                        == self.metaurl).bool() is True

                return True
            except Exception as e:
                print(str(e))
                raise LookupError(
                    "The verification attributes have not been set" +
                    " correctly. Or global_id is already present: " + str(e))
Example #3
0
 def __init__(self):
     self.session = orm.Session()
     self.table_types = {
         'taxa_table': orm.taxa_types,
         'count_table': orm.count_types,
         'density_table': orm.density_types,
         'biomass_table': orm.biomass_types,
         'individual_table': orm.individual_types,
         'percent_cover_table': orm.percent_cover_types,
         'project_table': orm.project_types
     }
     self.rawdata = None
     self.metadata_key = None
     self.sitelabel = None
     self.formateddata = None
    def update_data(self):
        '''
        Method to check whether the study sites (study_site_key)
        have already been uploaded into the database; either from
        the same LTER or different LTER's.

        If there are matches it updates the query view table class
        AND prompts the user to check if the sites about to be
        entered are exactly the same site as in the db (so
        studies can be joined by similar LTER study site locations)

        Note, if the user changes one of the site names, this
        method will account for that before the 'check' begins
        '''

        # Query to check is study sites are in db
        session = orm.Session()
        sitecheck = session.query(orm.study_site_table.__table__).order_by(
            orm.study_site_table.study_site_key).filter(
                orm.study_site_table.study_site_key.in_(
                    self.sitelevels_submit_block))
        session.close()
        site_df_from_query = read_sql(sitecheck.statement,
                                      sitecheck.session.bind)

        # Conditionals to make sure that no results were
        # returned or not and sets check object (checker)
        # First conditional is in case the database is empty
        # and/or query returns None
        print('checker df: ', site_df_from_query)
        if site_df_from_query is not None:
            if site_df_from_query.empty is True:
                no_site_matches_from_db = True
            else:
                # If there are matches for study sites
                # make a list of match names to be promt user
                # with
                no_site_matches_from_db = False
                site_matches_from_db = site_df_from_query[
                    'study_site_key'].values.tolist()
        else:
            no_site_matches_from_db = True
        print('no_site_matches_from_db status: ', no_site_matches_from_db)

        # If there are matches and user confirms they
        # are the same site's in the database then accept
        # changes and query site id's again to make sure
        # no more shared study sites are left
        if no_site_matches_from_db == True:
            self.sitetablemodel = self.viewEdit(
                self.site_table_data_to_upload_to_db.applymap(str))
            self.listviewSiteLabels.setModel(self.sitetablemodel)

        else:
            query_matches_view = view.PandasTableModel(
                site_df_from_query[site_df_from_query['study_site_key'].isin(
                    site_matches_from_db)])
            self.preview_validate.tabviewPreview.setModel(query_matches_view)
            self.sitequerymodel = query_matches_view
            self.tabviewDbSiteQuery.setModel(self.sitequerymodel)
            self.tabviewDbSiteQuery.resizeColumnsToContents()
            self.preview_validate.show()
Example #5
0
]
spatial_label = [study_site_label]
spatial_key = ['spatial_replication_level_1_label']
for i in range(len(spatial_index)):
    spatial_key.append(obs_columns_in_push_table[i] + '_label')
    spatial_label.append(obs_columns_in_data[i])

update_dict = {}
for i, item in enumerate(spatial_key):
    update_dict[item] = spatial_label[i]

orm.conn.execute(
    update(orm.project_table).where(
        column('proj_metadata_key') == 1).values(update_dict))

session = orm.Session()
sitecheck = session.query(orm.study_site_table.__table__).order_by(
    orm.study_site_table.__table__.c.study_site_key).filter(
        orm.study_site_table.__table__.c.lter_table_fkey == 'SBC')
sitecheckdf = pd.read_sql(sitecheck.statement, sitecheck.session.bind)
sitecheckdf['test'] = [i for i in range(len(sitecheckdf))]

sitecheckdf.loc[:, 'test'].idxmax()
sitecheckdf.loc[:, 'test'].idxmin()

engine = create_engine(
    'postgresql+psycopg2://postgres:demography@localhost/popler_3', echo=True)
metadata = MetaData(bind=engine)
base = declarative_base()
conn = engine.connect()
    def save_close(self):
        self.update_data()
        session = orm.Session()
        sitecheck = session.query(orm.Sitetable.siteid).order_by(
            orm.Sitetable.siteid)
        session.close()
        sitecheckdf = read_sql(sitecheck.statement, sitecheck.session.bind)
        changed_df = self.sitetablemodel.data(None, QtCore.Qt.UserRole)
        changed_site_list = changed_df['siteid'].values.tolist()

        if sitecheckdf is not None:
            if len(sitecheckdf) == 0:
                checker = True
            else:
                records_entered = sitecheckdf['siteid'].values.tolist()
                check = [
                    x for x in list(set(records_entered))
                    if x in changed_site_list
                ]
                checker = (len(check) == 0)
        else:
            checker = True

        if checker is True:
            pass
        else:
            self._log.debug('SiteId present under different LTER')
            self.error.showMessage('Site abbreviations already in database ' +
                                   'from an different LTER. Please modify ' +
                                   'site abbreviations.')
            raise AttributeError('SiteID already present under different LTER')

        self.save_data = self.sitetablemodel.data(None, QtCore.Qt.UserRole)

        # Updating  site levels
        self.facade.register_site_levels(self.facade._data[
            self.siteloc['siteid']].drop_duplicates().values.tolist())

        if len(self.save_data) == 0:
            self.save_data = self.save_data.append(
                DataFrame(
                    {
                        'siteid': 'NULL',
                        'lat': 'nan',
                        'lng': 'nan',
                        'descript': 'NULL'
                    },
                    index=[0]))
        else:
            pass

        lterid_df = hlp.produce_null_df(1, ['lterid'], len(self.save_data),
                                        self.lter)
        print(lterid_df)

        self.save_data = concat([self.save_data, lterid_df],
                                axis=1).reset_index(drop=True)
        print(self.save_data)
        self.facade.push_tables['sitetable'] = self.save_data

        hlp.write_column_to_log(self.sitelned, self._log, 'sitetable_c')

        oldsitetable = hlp.produce_null_df(
            len(self.save_data.columns),
            self.save_data.columns.values.tolist(), len(self.save_data), 'nan')
        hlp.updated_df_values(oldsitetable, self.save_data, self._log,
                              'sitetable')

        self.climatesite_unlocks.emit(self.facade._data)
        self._log.debug('facade site levels' +
                        ' '.join(self.facade._valueregister['sitelevels']))
        self.submit_change()
        self.close()
Example #7
0
    def merge_for_datatype_table_upload(
            self, raw_dataframe,
            formated_dataframe,
            formated_dataframe_name,
            covariate_dataframe,
            siteinprojkeydf,
            raw_data_taxa_columns,
            uploaded_taxa_columns):

        print('start dtype upload')
        orm.replace_numeric_null_with_string(raw_dataframe)
        orm.replace_numeric_null_with_string(formated_dataframe)
        print('replacing nulls is a pain')

        # Step 2) Query taxa_table to get the auto generated
        # primary keys returned. Turn query data into
        # dataframe.
        session = self.session
        taxa_key_statement = session.execute(
            select([orm.taxa_table]).
            where(
                orm.taxa_table.__table__.c.site_in_project_taxa_key.in_
                (siteinprojkeydf['site_in_project_key'].values.tolist())
            )
        )
        session.close()
        taxa_key_df = DataFrame(taxa_key_statement.fetchall())
        taxa_key_df.columns = taxa_key_statement.keys()
        taxa_key_df.replace({None: 'NA'}, inplace=True)

        # Step 3) Subsetting the query tabled for record that only pertain
        # to the count data (because we will be subsetting from this
        # queried taxa table later)
        dtype_subset_taxa_key_df = taxa_key_df[
            taxa_key_df['site_in_project_taxa_key'].isin(
                siteinprojkeydf['site_in_project_key'])]

        # Step 4) Merge the taxa_table query results with
        # the site_in_project table query that was performed
        # to upload the taxa_table (see above). This gives
        # you a table with site names and taxonomic information
        # allowing for a merge with the original dtype data
        tbl_dtype_merged_taxakey_siteinprojectkey = merge(
            dtype_subset_taxa_key_df, siteinprojkeydf,
            left_on='site_in_project_taxa_key',
            right_on='site_in_project_key', how='inner')

        raw_dataframe_siteinproj = merge(
            raw_dataframe, siteinprojkeydf,
            left_on=self.sitelabel, right_on='study_site_table_fkey',
            sort=False, how='left')

        raw_data_taxa_columns.append('site_in_project_key')
        uploaded_taxa_columns.append('site_in_project_taxa_key')

        raw_data_taxa_columns.append('site_in_project_key')
        uploaded_taxa_columns.append('site_in_project_taxa_key')

        print('updated raw data col list: ', raw_dataframe_siteinproj)
        print('update taxa data col list: ', uploaded_taxa_columns)
        # Step 5) Merge the original dtype data with the
        # merged taxa_table query to have all foreign keys...
        # taxa and site_project
        # matched up with the original observations.
        dtype_merged_with_taxa_and_siteinproj_key = merge(
            raw_dataframe_siteinproj,
            tbl_dtype_merged_taxakey_siteinprojectkey,
            left_on=list(raw_data_taxa_columns),
            right_on=list(uploaded_taxa_columns),
            how='left')

        # Step 6) Take the merged original data with all foreign keys,
        # and merged that with the formatted dtype_table based on index
        # values (order or records should not changed from the original data
        # to the formatted data)
        tbl_dtype_merged_with_all_keys = merge(
            formated_dataframe,
            dtype_merged_with_taxa_and_siteinproj_key,
            left_index=True, right_index=True, how='inner',
            suffixes=('', '_y'))

        # Step 7) List the columns that will be needed to push the
        # dtype table to the database (including foreign keys)
        tbl_dtype_columns_to_upload = [
            'taxa_table_key', 'site_in_project_taxa_key', 'year_derived',
            'month_derived', 'day_derived', 'spatial_replication_level_1',
            'spatial_replication_level_2', 'spatial_replication_level_3',
            'spatial_replication_level_4', 'spatial_replication_level_5',
            'structure_type_1', 'structure_type_2',
            'structure_type_3', 'structure_type_4',
            'treatment_type_1', 'treatment_type_2',
            'treatment_type_3',
            'covariates'
        ]
        time_cols_rename = {
            'year_derived': 'year',
            'month_derived': 'month',
            'day_derived': 'day'
        }            
        tbl_dtype_columns_to_upload.append(
            '{}_observation'.format(str(formated_dataframe_name)))
        tbl_dtype_merged_with_all_keys = concat(
            [tbl_dtype_merged_with_all_keys, covariate_dataframe],
            axis=1)

        # Step 8) Subsetting the fully merged dtype table data
        tbl_dtype_to_upload = tbl_dtype_merged_with_all_keys[
            tbl_dtype_columns_to_upload]
        tbl_dtype_to_upload.rename(
            columns=time_cols_rename, inplace=True
        )

        # Step 9) Renaming columns to reflect that in database table
        # And converting data types
        tbl_dtype_to_upload.rename(columns={
            'taxa_table_key': 'taxa_{}_fkey'.format(str(formated_dataframe_name))}
            , inplace=True)
        datatype_key = 'site_in_project_{}_fkey'.format(str(formated_dataframe_name))
        tbl_dtype_to_upload.rename(columns={
            'site_in_project_taxa_key': datatype_key}, inplace=True)
        tbl_dtype_to_upload.fillna('NA', inplace=True)
        self.formateddata = tbl_dtype_to_upload
        # Step 10) Uploading to the database
        datatype_table = '{}_table'.format(str(formated_dataframe_name))
        datatype_obs = '{}_observation'.format(str(formated_dataframe_name))
        print('push raw_before', tbl_dtype_to_upload.columns)

        tbl_dtype_to_upload[datatype_obs] = to_numeric(
            tbl_dtype_to_upload[datatype_obs], errors='coerce'
        )
        
        text_cols = [
            'spatial_replication_level_1', 'spatial_replication_level_2',
            'spatial_replication_level_3', 'spatial_replication_level_4',
            'spatial_replication_level_5', 'treatment_type_1',
            'treatment_type_2', 'treatment_type_3',
            'structure_type_1', 'structure_type_2', 'structure_type_3',
            'structure_type_4'
        ]
        tbl_dtype_to_upload[text_cols] = tbl_dtype_to_upload[
            text_cols].applymap(str)
        tbl_dtype_to_upload[text_cols] = tbl_dtype_to_upload[
            text_cols].applymap(lambda x: x.strip())

        print(tbl_dtype_to_upload.dtypes)
        print(self.table_types[datatype_table])
        try:
            orm.convert_types(tbl_dtype_to_upload, self.table_types[datatype_table])
        except Exception as e:
            print('converting issues: ', str(e))


        print('push raw_after', tbl_dtype_to_upload.columns)
        print(tbl_dtype_to_upload.dtypes)
        print('this should have worked')


        other_numerics = [
            'year', 'month', 'day', datatype_key,
            'taxa_{}_fkey'.format(str(formated_dataframe_name))
        ]

        tbl_dtype_to_upload[datatype_obs] = to_numeric(
            tbl_dtype_to_upload[datatype_obs], errors='coerce'
        )

        try:
            tbl_dtype_to_upload[other_numerics].replace({'NA', -99999}, inplace=True)
        except Exception as e:
            print('No NAs to replace:', str(e))
        try:
            tbl_dtype_to_upload[other_numerics].replace({'NaN' -99999}, inplace=True)
        except Exception as e:
            print('No NaN to replace:', str(e))
        try:
            tbl_dtype_to_upload[other_numerics].replace({None, -99999}, inplace=True)
        except Exception as e:
            print('No None to replace:', str(e))
        tbl_dtype_to_upload[other_numerics].fillna(-99999, inplace=True)


        tbl_dtype_to_upload.loc[:, other_numerics] = tbl_dtype_to_upload.loc[
            :, other_numerics].apply(to_numeric, errors='coerce')

        metadata_key_column_name = 'metadata_{}_key'.format(
            formated_dataframe_name)
        tbl_dtype_to_upload[metadata_key_column_name] = int(self.metadata_key)

        # Attempting direct copy_from to copy_to commands
        # with stringIO (should be faster than pandas)
        # text buffer            
        sql_datatype_columns = tbl_dtype_to_upload.columns.values.tolist()
        s_buf = io.StringIO()
        tbl_dtype_to_upload.to_csv(s_buf, index=False, sep="\t")
        s_buf.seek(0)
        session = orm.Session()
        cur = session.connection().connection.cursor()
        copy_sql_statement = "COPY {}({}) FROM STDIN WITH CSV HEADER DELIMITER AS '\t'".format(
            datatype_table, ", ".join(sql_datatype_columns))
        cur.copy_expert(copy_sql_statement, s_buf)
        session.commit()
        session.close()

        #tbl_dtype_to_upload.to_sql(
        #    datatype_table,
        #    orm.conn, if_exists='append', index=False, chunksize=1000)

        print('past datatype upload')