Python get_table_header Examples, app.ws.utils.get_table_header Python Examples

Example #1

0

Show file

File: assay_table.py Project: EBI-Metabolights/MtblsWS-Py

    def get(self, study_id, assay_file_name):
        # param validation
        if study_id is None or assay_file_name is None:
            logger.info('No study_id and/or assay file name given')
            abort(404)
        study_id = study_id.upper()

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        logger.info('Assay Table: Getting ISA-JSON Study %s', study_id)
        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not read_access:
            abort(403)

        assay_file_name = os.path.join(study_location, assay_file_name)
        logger.info('Trying to load Assay (%s) for Study %s', assay_file_name, study_id)
        # Get the Assay table or create a new one if it does not already exist
        assay_df = pd.read_csv(assay_file_name, sep="\t", header=0, encoding='utf-8')
        # Get rid of empty numerical values
        assay_df = assay_df.replace(np.nan, '', regex=True)

        df_data_dict = totuples(assay_df.reset_index(), 'rows')

        # Get an indexed header row
        df_header = get_table_header(assay_df)

        return {'header': df_header, 'data': df_data_dict}

Example #2

0

Show file

File: table_editor.py Project: EBI-Metabolights/MtblsWS-Py

    def post(self, study_id, file_name):

        parser = reqparse.RequestParser()
        parser.add_argument('new_column_name', help="Name of new column")
        new_column_name = None
        parser.add_argument('new_column_position', help="The position (column #) of new column")
        new_column_position = None
        parser.add_argument('new_column_default_value', help="The (optional) default value of new column")
        new_column_default_value = None

        if request.args:
            args = parser.parse_args(req=request)
            new_column_name = args['new_column_name']
            new_column_position = args['new_column_position']
            new_column_default_value = args['new_column_default_value']

        if new_column_name is None:
            abort(404, "Please provide valid name for the new column")

        # param validation
        if study_id is None or file_name is None:
            abort(404, 'Please provide valid parameters for study identifier and file name')
        study_id = study_id.upper()

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        file_name = os.path.join(study_location, file_name)
        try:
            table_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        #  Need to add values for each existing row (not header)
        new_col = []
        for row_val in range(table_df.shape[0]):
            new_col.append(new_column_default_value)

        # Add new column to the spreadsheet
        table_df.insert(loc=int(new_column_position), column=new_column_name, value=new_col, allow_duplicates=True)

        df_data_dict = totuples(table_df.reset_index(), 'rows')

        # Get an indexed header row
        df_header = get_table_header(table_df)

        message = write_tsv(table_df, file_name)

        return {'header': df_header, 'data': df_data_dict, 'message': message}

Example #3

0

Show file

File: table_editor.py Project: EBI-Metabolights/MtblsWS-Py

    def put(self, study_id, file_name):

        try:
            data_dict = json.loads(request.data.decode('utf-8'))
            columns_rows = data_dict['data']
        except KeyError:
            columns_rows = None

        if columns_rows is None:
            abort(404, "Please provide valid key-value pairs for the cell value."
                       "The JSON string has to have a 'data' element")

        # param validation
        if study_id is None or file_name is None:
            abort(404, 'Please provide valid parameters for study identifier and/or file name')
        study_id = study_id.upper()

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        file_name = os.path.join(study_location, file_name)
        try:
            table_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        for column in columns_rows:
            cell_value = column['value']
            row_index = column['row']
            column_index = column['column']
            #  Need to add values for column and row (not header)
            try:
                #for row_val in range(table_df.shape[0]):
                table_df.iloc[int(row_index), int(column_index)] = cell_value
            except ValueError:
                abort(417, "Unable to find the required 'value', 'row' and 'column' values")

        # Write the new row back in the file
        message = write_tsv(table_df, file_name)

        df_data_dict = totuples(table_df.reset_index(), 'rows')

        # Get an indexed header row
        df_header = get_table_header(table_df)

        return {'header': df_header, 'rows': df_data_dict, 'message': message}

Example #4

0

Show file

File: assay_table.py Project: EBI-Metabolights/MtblsWS-Py

    def delete(self, study_id, assay_file_name):

        # query validation
        parser = reqparse.RequestParser()
        parser.add_argument('row_num', help="The row number of the cell(s) to remove (exclude header)", location="args")
        args = parser.parse_args()
        row_num = args['row_num']

        # param validation
        if study_id is None or assay_file_name is None or row_num is None:
            abort(404)
        study_id = study_id.upper()

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        assay_file_name = os.path.join(study_location, assay_file_name)

        assay_df = pd.read_csv(assay_file_name, sep="\t", header=0, encoding='utf-8')
        assay_df = assay_df.replace(np.nan, '', regex=True)  # Remove NaN
        row_nums = row_num.split(",")

        # Need to remove the highest row number first as the DataFrame dynamically re-orders when one row is removed
        sorted_num_rows = [int(x) for x in row_nums]
        sorted_num_rows.sort(reverse=True)
        for num in sorted_num_rows:
            assay_df = assay_df.drop(assay_df.index[num])  # Drop row(s) in the spreadsheet

        # Remove all ".n" numbers at the end of duplicated column names
        assay_df.rename(columns=lambda x: re.sub(r'\.[0-9]+$', '', x), inplace=True)

        # Write the updated file
        assay_df.to_csv(assay_file_name, sep="\t", encoding='utf-8', index=False)

        # To be sure we read the file again
        assay_df = pd.read_csv(assay_file_name, sep="\t", header=0, encoding='utf-8')
        assay_df = assay_df.replace(np.nan, '', regex=True)  # Remove NaN

        df_data_dict = totuples(assay_df.reset_index(), 'rows')

        # Get an indexed header row
        df_header = get_table_header(assay_df)

        return {'header': df_header, 'data': df_data_dict}

Example #5

0

Show file

File: assay_table.py Project: EBI-Metabolights/MtblsWS-Py

    def post(self, study_id, assay_file_name):

        try:
            data_dict = json.loads(request.data.decode('utf-8'))
            new_row = data_dict['data']
        except KeyError:
            new_row = None

        if new_row is None:
            abort(404, "Please provide valid data for updated new row(s). "
                       "The JSON string has to have a 'assayData' element")

        try:
            for element in new_row:
                element.pop('index', None)  # Remove "index:n" element, this is the original row number
        except:
            logger.info('No index (row num) supplied, ignoring')

        # param validation
        if study_id is None or assay_file_name is None:
            abort(404, 'Please provide valid parameters for study identifier and assay file name')
        study_id = study_id.upper()

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        assay_file_name = os.path.join(study_location, assay_file_name)

        assay_df = pd.read_csv(assay_file_name, sep="\t", header=0, encoding='utf-8')
        assay_df = assay_df.replace(np.nan, '', regex=True)  # Remove NaN
        assay_df = assay_df.append(new_row, ignore_index=True)  # Add new row to the spreadsheet

        # Remove all ".n" numbers at the end of duplicated column names
        assay_df.rename(columns=lambda x: re.sub(r'\.[0-9]+$', '', x), inplace=True)

        # Write the new row back in the file
        assay_df.to_csv(assay_file_name, sep="\t", encoding='utf-8', index=False)

        df_data_dict = totuples(assay_df.reset_index(), 'rows')

        # Get an indexed header row
        df_header = get_table_header(assay_df)

        return {'header': df_header, 'data': df_data_dict}

Example #6

0

Show file

File: table_editor.py Project: EBI-Metabolights/MtblsWS-Py

    def delete(self, study_id, file_name):
        # query validation
        parser = reqparse.RequestParser()
        parser.add_argument('row_num', help="The row number of the cell(s) to remove (exclude header)", location="args")
        args = parser.parse_args()
        row_num = args['row_num']

        # param validation
        if study_id is None or file_name is None or row_num is None:
            abort(404)
        study_id = study_id.upper()

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        file_name = os.path.join(study_location, file_name)
        try:
            file_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        row_nums = row_num.split(",")

        # Need to remove the highest row number first as the DataFrame dynamically re-orders when one row is removed
        sorted_num_rows = [int(x) for x in row_nums]
        sorted_num_rows.sort(reverse=True)
        for num in sorted_num_rows:
            file_df = file_df.drop(file_df.index[num])  # Drop row(s) in the spreadsheet

        message = write_tsv(file_df, file_name)

        # To be sure we read the file again
        try:
            file_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        df_data_dict = totuples(file_df.reset_index(), 'rows')

        # Get an indexed header row
        df_header = get_table_header(file_df)

        return {'header': df_header, 'data': df_data_dict, 'message': message}

Example #7

0

Show file

    def get(self, study_id, file_name):
        # param validation
        if study_id is None or file_name is None:
            logger.info('No study_id and/or TSV file name given')
            abort(404)

        fname, ext = os.path.splitext(file_name)
        ext = ext.lower()
        if ext not in ('.tsv', '.csv', '.txt'):
            abort(400,
                  "The file " + file_name + " is not a valid TSV or CSV file")

        study_id = study_id.upper()
        file_name_param = file_name  # store the passed filename for simplicity

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        logger.info('Assay Table: Getting ISA-JSON Study %s', study_id)
        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not read_access:
            abort(403)

        if file_name == 'metabolights_zooma.tsv':  # This will edit the MetaboLights Zooma mapping file
            if not is_curator:
                abort(403)
            file_name = app.config.get('MTBLS_ZOOMA_FILE')
        else:
            file_name = os.path.join(study_location, file_name)

        logger.info('Trying to load TSV file (%s) for Study %s', file_name,
                    study_id)
        # Get the Assay table or create a new one if it does not already exist
        try:
            file_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        df_data_dict = totuples(file_df.reset_index(), 'rows')

        # Get an indexed header row
        df_header = get_table_header(file_df, study_id, file_name_param)

        return {'header': df_header, 'data': df_data_dict}

Example #8

0

Show file

    def get(self, study_id, sample_file_name):
        # param validation
        if study_id is None:
            logger.info('No study_id given')
            abort(404)

        if sample_file_name is None:
            logger.info('No sample file name given')
            abort(404)

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        logger.info(
            'sample Table: Getting ISA-JSON Study %s, using API-Key %s',
            study_id, user_token)

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, study_status = \
            wsc.get_permissions(study_id, user_token)
        if not read_access:
            abort(403)

        sample_file_name = os.path.join(study_location, sample_file_name)
        logger.info('Trying to load sample (%s) for Study %s',
                    sample_file_name, study_id)
        # Get the sample table or create a new one if it does not already exist
        sample_df = pd.read_csv(sample_file_name,
                                sep="\t",
                                header=0,
                                encoding='utf-8')
        # Get rid of empty numerical values
        sample_df = sample_df.replace(np.nan, '', regex=True)

        df_data_dict = totuples(sample_df.reset_index(), 'rows')

        # Get an indexed header row
        df_header = get_table_header(sample_df)

        return {'header': df_header, 'data': df_data_dict}

Example #9

0

Show file

File: table_editor.py Project: EBI-Metabolights/MtblsWS-Py

    def get(self, study_id, file_name):
        # param validation
        if study_id is None or file_name is None:
            logger.info('No study_id and/or TSV file name given')
            abort(404)
        study_id = study_id.upper()
        file_name_param = file_name  # store the passed filename for simplicity

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        logger.info('Assay Table: Getting ISA-JSON Study %s', study_id)
        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not read_access:
            abort(403)

        if file_name == 'metabolights_zooma.tsv':  # This will edit the MetaboLights Zooma mapping file
            if not is_curator:
                abort(403)
            file_name = app.config.get('MTBLS_ZOOMA_FILE')
        else:
            file_name = os.path.join(study_location, file_name)

        logger.info('Trying to load TSV file (%s) for Study %s', file_name, study_id)
        # Get the Assay table or create a new one if it does not already exist
        try:
            file_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        df_data_dict = totuples(file_df.reset_index(), 'rows')

        # Get an indexed header row
        df_header = get_table_header(file_df, study_id, file_name_param)

        return {'header': df_header, 'data': df_data_dict}

Example #10

0

Show file

    def put(self, study_id, file_name):

        try:
            data_dict = json.loads(request.data.decode('utf-8'))
            columns_rows = data_dict['data']
        except KeyError:
            columns_rows = None

        if columns_rows is None:
            abort(
                404, "Please provide valid key-value pairs for the cell value."
                "The JSON string has to have a 'data' element")

        # param validation
        if study_id is None or file_name is None:
            abort(
                404,
                'Please provide valid parameters for study identifier and/or file name'
            )

        fname, ext = os.path.splitext(file_name)
        ext = ext.lower()
        if ext not in ('.tsv', '.csv', '.txt'):
            abort(400,
                  "The file " + file_name + " is not a valid TSV or CSV file")

        study_id = study_id.upper()

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        file_name = os.path.join(study_location, file_name)
        try:
            table_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        for column in columns_rows:
            cell_value = column['value']
            row_index = column['row']
            column_index = column['column']
            #  Need to add values for column and row (not header)
            try:
                # for row_val in range(table_df.shape[0]):
                table_df.iloc[int(row_index), int(column_index)] = cell_value
            except ValueError as e:
                logger.error(
                    "(ValueError) Unable to find the required 'value', 'row' and 'column' values. Value: "
                    + cell_value + ", row: " + row_index + ", column: " +
                    column + ". " + str(e))
                abort(
                    417,
                    "(ValueError) Unable to find the required 'value', 'row' and 'column' values. Value: "
                    + cell_value + ", row: " + row_index + ", column: " +
                    column)
            except IndexError:
                logger.error(
                    "(IndexError) Unable to find the required 'value', 'row' and 'column' values. Value: "
                    + cell_value + ", row: " + row_index + ", column: " +
                    column + ". " + str(e))
                abort(
                    417,
                    "(IndexError) Unable to find the required 'value', 'row' and 'column' values. Value: "
                    + cell_value + ", row: " + row_index + ", column: " +
                    column)

        # Write the new row back in the file
        message = write_tsv(table_df, file_name)

        df_data_dict = totuples(table_df.reset_index(), 'rows')

        # Get an indexed header row
        df_header = get_table_header(table_df)

        return {'header': df_header, 'rows': df_data_dict, 'message': message}

Example #11

0

Show file

    def post(self, study_id, file_name):

        try:
            data_dict = json.loads(request.data.decode('utf-8'))
            new_columns = data_dict['data']
        except KeyError:
            new_columns = None

        if new_columns is None:
            abort(
                417,
                "Please provide valid key-value pairs for the new columns."
                "The JSON string has to have a 'data' element")

        # param validation
        if study_id is None or file_name is None:
            abort(
                404,
                'Please provide valid parameters for study identifier and/or file name'
            )
        study_id = study_id.upper()

        fname, ext = os.path.splitext(file_name)
        ext = ext.lower()
        if ext not in ('.tsv', '.csv', '.txt'):
            abort(400,
                  "The file " + file_name + " is not a valid TSV or CSV file")

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
        study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        file_name = os.path.join(study_location, file_name)
        try:
            table_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        audit_status, dest_path = write_audit_files(study_location)

        # Get an indexed header row
        df_header = get_table_header(table_df)

        for column in new_columns:
            new_column_default_value = column['value']
            new_column_name = column['name']
            new_column_position = column['index']

            #  Need to add values for each existing row (not header)
            new_col = []
            for row_val in range(table_df.shape[0]):
                new_col.append(new_column_default_value)

            # Check if we already have the column in the current position
            try:
                header_name = table_df.iloc[:, new_column_position].name
            except:
                header_name = ""

            if header_name == new_column_name:  # We should update the existing column
                table_df.iloc[:, new_column_position] = new_col
            else:
                # Add new column to the spreadsheet
                table_df.insert(loc=int(new_column_position),
                                column=new_column_name,
                                value=new_col,
                                allow_duplicates=True)

        # Get an (updated) indexed header row
        df_header = get_table_header(table_df)

        # Get all indexed rows
        df_data_dict = totuples(table_df.reset_index(), 'rows')

        message = write_tsv(table_df, file_name)

        return {'header': df_header, 'rows': df_data_dict, 'message': message}

Example #12

0

Show file

    def post(self, study_id, file_name):

        parser = reqparse.RequestParser()
        parser.add_argument('new_column_name', help="Name of new column")
        new_column_name = None
        parser.add_argument('new_column_position',
                            help="The position (column #) of new column")
        new_column_position = None
        parser.add_argument('new_column_default_value',
                            help="The (optional) default value of new column")
        new_column_default_value = None

        if request.args:
            args = parser.parse_args(req=request)
            new_column_name = args['new_column_name']
            new_column_position = args['new_column_position']
            new_column_default_value = args['new_column_default_value']

        if new_column_name is None:
            abort(404, "Please provide valid name for the new column")

        # param validation
        if study_id is None or file_name is None:
            abort(
                404,
                'Please provide valid parameters for study identifier and file name'
            )
        study_id = study_id.upper()

        fname, ext = os.path.splitext(file_name)
        ext = ext.lower()
        if ext not in ('.tsv', '.csv', '.txt'):
            abort(400,
                  "The file " + file_name + " is not a valid TSV or CSV file")

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
        study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        file_name = os.path.join(study_location, file_name)
        try:
            table_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        audit_status, dest_path = write_audit_files(study_location)

        #  Need to add values for each existing row (not header)
        new_col = []
        for row_val in range(table_df.shape[0]):
            new_col.append(new_column_default_value)

        # Add new column to the spreadsheet
        table_df.insert(loc=int(new_column_position),
                        column=new_column_name,
                        value=new_col,
                        allow_duplicates=True)

        df_data_dict = totuples(table_df.reset_index(), 'rows')

        # Get an indexed header row
        df_header = get_table_header(table_df)

        message = write_tsv(table_df, file_name)

        return {'header': df_header, 'data': df_data_dict, 'message': message}

Example #13

0

Show file

    def delete(self, study_id, file_name):
        # query validation
        parser = reqparse.RequestParser()
        parser.add_argument(
            'row_num',
            help="The row number of the cell(s) to remove (exclude header)",
            location="args")
        args = parser.parse_args()
        row_num = args['row_num']

        # param validation
        if study_id is None or file_name is None or row_num is None:
            abort(404)

        fname, ext = os.path.splitext(file_name)
        ext = ext.lower()
        if ext not in ('.tsv', '.csv', '.txt'):
            abort(400,
                  "The file " + file_name + " is not a valid TSV or CSV file")

        study_id = study_id.upper()

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        file_name = os.path.join(study_location, file_name)
        try:
            file_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        row_nums = row_num.split(",")

        # Need to remove the highest row number first as the DataFrame dynamically re-orders when one row is removed
        sorted_num_rows = [int(x) for x in row_nums]
        sorted_num_rows.sort(reverse=True)
        for num in sorted_num_rows:
            file_df = file_df.drop(
                file_df.index[num])  # Drop row(s) in the spreadsheet

        message = write_tsv(file_df, file_name)

        # To be sure we read the file again
        try:
            file_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        df_data_dict = totuples(file_df.reset_index(), 'rows')

        # Get an indexed header row
        df_header = get_table_header(file_df)

        return {'header': df_header, 'data': df_data_dict, 'message': message}

Example #14

0

Show file

File: assay_table.py Project: EBI-Metabolights/MtblsWS-Py

    def delete(self, study_id, assay_file_name):

        # query validation
        parser = reqparse.RequestParser()
        parser.add_argument(
            'row_num',
            help="The row number of the cell(s) to remove (exclude header)",
            location="args")
        args = parser.parse_args()
        row_num = args['row_num']

        # param validation
        if study_id is None or assay_file_name is None or row_num is None:
            abort(404)
        study_id = study_id.upper()

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        assay_file_name = os.path.join(study_location, assay_file_name)

        assay_df = pd.read_csv(assay_file_name,
                               sep="\t",
                               header=0,
                               encoding='utf-8')
        assay_df = assay_df.replace(np.nan, '', regex=True)  # Remove NaN
        row_nums = row_num.split(",")

        # Need to remove the highest row number first as the DataFrame dynamically re-orders when one row is removed
        sorted_num_rows = [int(x) for x in row_nums]
        sorted_num_rows.sort(reverse=True)
        for num in sorted_num_rows:
            assay_df = assay_df.drop(
                assay_df.index[num])  # Drop row(s) in the spreadsheet

        # Remove all ".n" numbers at the end of duplicated column names
        assay_df.rename(columns=lambda x: re.sub(r'\.[0-9]+$', '', x),
                        inplace=True)

        # Write the updated file
        assay_df.to_csv(assay_file_name,
                        sep="\t",
                        encoding='utf-8',
                        index=False)

        # To be sure we read the file again
        assay_df = pd.read_csv(assay_file_name,
                               sep="\t",
                               header=0,
                               encoding='utf-8')
        assay_df = assay_df.replace(np.nan, '', regex=True)  # Remove NaN

        df_data_dict = totuples(assay_df.reset_index(), 'rows')

        # Get an indexed header row
        df_header = get_table_header(assay_df)

        return {'header': df_header, 'data': df_data_dict}

Example #15

0

Show file

File: assay_table.py Project: EBI-Metabolights/MtblsWS-Py

    def post(self, study_id, assay_file_name):

        try:
            data_dict = json.loads(request.data.decode('utf-8'))
            new_row = data_dict['data']
        except KeyError:
            new_row = None

        if new_row is None:
            abort(
                404, "Please provide valid data for updated new row(s). "
                "The JSON string has to have a 'assayData' element")

        try:
            for element in new_row:
                element.pop(
                    'index', None
                )  # Remove "index:n" element, this is the original row number
        except:
            logger.info('No index (row num) supplied, ignoring')

        # param validation
        if study_id is None or assay_file_name is None:
            abort(
                404,
                'Please provide valid parameters for study identifier and assay file name'
            )
        study_id = study_id.upper()

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        assay_file_name = os.path.join(study_location, assay_file_name)

        assay_df = pd.read_csv(assay_file_name,
                               sep="\t",
                               header=0,
                               encoding='utf-8')
        assay_df = assay_df.replace(np.nan, '', regex=True)  # Remove NaN
        assay_df = assay_df.append(
            new_row, ignore_index=True)  # Add new row to the spreadsheet

        # Remove all ".n" numbers at the end of duplicated column names
        assay_df.rename(columns=lambda x: re.sub(r'\.[0-9]+$', '', x),
                        inplace=True)

        # Write the new row back in the file
        assay_df.to_csv(assay_file_name,
                        sep="\t",
                        encoding='utf-8',
                        index=False)

        df_data_dict = totuples(assay_df.reset_index(), 'rows')

        # Get an indexed header row
        df_header = get_table_header(assay_df)

        return {'header': df_header, 'data': df_data_dict}

Example #16

0

Show file

File: table_editor.py Project: EBI-Metabolights/MtblsWS-Py

    def post(self, study_id, file_name):

        try:
            data_dict = json.loads(request.data.decode('utf-8'))
            new_columns = data_dict['data']
        except KeyError:
            new_columns = None

        if new_columns is None:
            abort(417, "Please provide valid key-value pairs for the new columns."
                       "The JSON string has to have a 'data' element")

        # param validation
        if study_id is None or file_name is None:
            abort(404, 'Please provide valid parameters for study identifier and/or file name')
        study_id = study_id.upper()

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        file_name = os.path.join(study_location, file_name)
        try:
            table_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        # Get an indexed header row
        df_header = get_table_header(table_df)

        for column in new_columns:
            new_column_default_value = column['value']
            new_column_name = column['name']
            new_column_position = column['index']

            #  Need to add values for each existing row (not header)
            new_col = []
            for row_val in range(table_df.shape[0]):
                new_col.append(new_column_default_value)

            # Check if we already have the column in the current position
            try:
                header_name = table_df.iloc[:, new_column_position].name
            except:
                header_name = ""

            if header_name == new_column_name:  # We should update the existing column
                table_df.iloc[:, new_column_position] = new_col
            else:
                # Add new column to the spreadsheet
                table_df.insert(loc=int(new_column_position), column=new_column_name,
                                value=new_col, allow_duplicates=True)

        # Get an (updated) indexed header row
        df_header = get_table_header(table_df)

        # Get all indexed rows
        df_data_dict = totuples(table_df.reset_index(), 'rows')

        message = write_tsv(table_df, file_name)

        return {'header': df_header, 'rows': df_data_dict, 'message': message}

Example #17

0

Show file

File: assay_table.py Project: EBI-Metabolights/MtblsWS-Py

    def put(self, study_id, assay_file_name):

        # param validation
        if study_id is None or assay_file_name is None:
            abort(404, 'Please provide valid parameters for study identifier and assay file name')
        study_id = study_id.upper()

        try:
            data_dict = json.loads(request.data.decode('utf-8'))
            new_rows = data_dict['data']  # Use "index:n" element, this is the original row number
        except KeyError:
            new_rows = None

        if new_rows is None:
            abort(404, "Please provide valid data for updated new row(s). "
                       "The JSON string has to have a 'assayData' element")

        for row in new_rows:
            try:
                row_index = row['index']  # Check if we have a value in the row number(s)
            except KeyError:
                row_index = None

            if new_rows is None or row_index is None:
                abort(404, "Please provide valid data for the updated row(s). "
                           "The JSON string has to have an 'index:n' element in each (JSON) row, "
                           "this is the original row number. The header row can not be updated")

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        assay_file_name = os.path.join(study_location, assay_file_name)

        assay_df = pd.read_csv(assay_file_name, sep="\t", header=0, encoding='utf-8')
        assay_df = assay_df.replace(np.nan, '', regex=True)  # Remove NaN

        row_index_int = None

        for row in new_rows:
            try:
                row_index_int = int(row['index'])
            except:
                row_index_int is None

            if row_index_int is not None:
                assay_df = assay_df.drop(assay_df.index[row_index_int])  # Remove the old row from the spreadsheet
                # pop the "index:n" from the new_row before updating
                row.pop('index', None)  # Remove "index:n" element from the (JSON) row, this is the original row number
                assay_df = insert_row(row_index_int, assay_df, row)  # Update the row in the spreadsheet

        # Remove all ".n" numbers at the end of duplicated column names
        assay_df.rename(columns=lambda x: re.sub(r'\.[0-9]+$', '', x), inplace=True)

        # Write the new row back in the file
        assay_df.to_csv(assay_file_name, sep="\t", encoding='utf-8', index=False)

        df_data_dict = totuples(assay_df.reset_index(), 'rows')

        # Get an indexed header row
        df_header = get_table_header(assay_df)

        return {'header': df_header, 'data': df_data_dict}

Example #18

0

Show file

File: table_editor.py Project: EBI-Metabolights/MtblsWS-Py

    def post(self, study_id, file_name):
        log_request(request)
        try:
            data_dict = json.loads(request.data.decode('utf-8'))
            new_row = data_dict['data']
        except KeyError:
            new_row = None

        if new_row is None:
            abort(417, "Please provide valid data for updated new row(s). The JSON string has to have a 'data' element")

        try:
            for element in new_row:
                element.pop('index', None)  # Remove "index:n" element, this is the original row number
        except:
            logger.info('No index (row num) supplied, ignoring')

        # param validation
        if study_id is None or file_name is None:
            abort(404, 'Please provide valid parameters for study identifier and TSV file name')
        study_id = study_id.upper()

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        if file_name == 'metabolights_zooma.tsv':  # This will edit the MetaboLights Zooma mapping file
            if not is_curator:
                abort(403)
            file_name = app.config.get('MTBLS_ZOOMA_FILE')
        else:
            file_name = os.path.join(study_location, file_name)

        try:
            file_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file name was not found")

        # Validate column names in new rows
        valid_column_name, message = validate_row(file_df, new_row, "post")
        if not valid_column_name:
            abort(417, message)

        if new_row[0]:
            file_df = file_df.append(new_row, ignore_index=True)  # Add new row to the spreadsheet (TSV file)
        else:
            file_df = file_df.append(pd.Series(), ignore_index=True)

        message = write_tsv(file_df, file_name)

        # Get an indexed header row
        df_header = get_table_header(file_df)

        # Get the updated data table
        try:
            df_data_dict = totuples(read_tsv(file_name), 'rows')
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        return {'header': df_header, 'data': df_data_dict, 'message': message}

Example #19

0

Show file

    def post(self, study_id, file_name):
        log_request(request)
        try:
            data_dict = json.loads(request.data.decode('utf-8'))
            data = data_dict['data']
            new_row = data['rows']
        except KeyError:
            new_row = None
            data = None

        if new_row is None:
            abort(
                417,
                "Please provide valid data for updated new row(s). The JSON string has to have a 'rows' element"
            )

        try:
            for element in new_row:
                element.pop(
                    'index', None
                )  # Remove "index:n" element, this is the original row number
        except:
            logger.info('No index (row num) supplied, ignoring')

        # param validation
        if study_id is None or file_name is None:
            abort(
                404,
                'Please provide valid parameters for study identifier and TSV file name'
            )

        fname, ext = os.path.splitext(file_name)
        ext = ext.lower()
        if ext not in ('.tsv', '.csv', '.txt'):
            abort(400,
                  "The file " + file_name + " is not a valid TSV or CSV file")

        study_id = study_id.upper()

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        if file_name == 'metabolights_zooma.tsv':  # This will edit the MetaboLights Zooma mapping file
            if not is_curator:
                abort(403)
            file_name = app.config.get('MTBLS_ZOOMA_FILE')
        else:
            file_name = os.path.join(study_location, file_name)

        try:
            file_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file name was not found")

        # Validate column names in new rows
        valid_column_name, message = validate_row(file_df, new_row, "post")
        if not valid_column_name:
            abort(417, message)

        if data:
            try:
                start_index = data['index']
                if start_index == -1:
                    start_index = 0
                start_index = start_index - 0.5

            except KeyError:
                start_index = len(file_df.index)

            # Map the complete row first, update with new_row
            complete_row = {}
            for col in file_df.columns:
                complete_row[col] = ""

            if not new_row:
                logger.warning(
                    "No new row information provided. Adding empty row " +
                    file_name + ", row " + str(complete_row))
            else:
                for row in new_row:
                    complete_row.update(row)
                    row = complete_row
                    line = pd.DataFrame(row, index=[start_index])
                    file_df = file_df.append(line, ignore_index=False)
                    file_df = file_df.sort_index().reset_index(drop=True)
                    start_index += 1

            file_df = file_df.replace(np.nan, '', regex=True)
            message = write_tsv(file_df, file_name)

        # Get an indexed header row
        df_header = get_table_header(file_df)

        # Get the updated data table
        try:
            df_data_dict = totuples(read_tsv(file_name), 'rows')
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        return {'header': df_header, 'data': df_data_dict, 'message': message}

Example #20

0

Show file

File: assay_table.py Project: EBI-Metabolights/MtblsWS-Py

    def put(self, study_id, assay_file_name):

        # param validation
        if study_id is None or assay_file_name is None:
            abort(
                404,
                'Please provide valid parameters for study identifier and assay file name'
            )
        study_id = study_id.upper()

        try:
            data_dict = json.loads(request.data.decode('utf-8'))
            new_rows = data_dict[
                'data']  # Use "index:n" element, this is the original row number
        except KeyError:
            new_rows = None

        if new_rows is None:
            abort(
                404, "Please provide valid data for updated new row(s). "
                "The JSON string has to have a 'assayData' element")

        for row in new_rows:
            try:
                row_index = row[
                    'index']  # Check if we have a value in the row number(s)
            except KeyError:
                row_index = None

            if new_rows is None or row_index is None:
                abort(
                    404, "Please provide valid data for the updated row(s). "
                    "The JSON string has to have an 'index:n' element in each (JSON) row, "
                    "this is the original row number. The header row can not be updated"
                )

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        assay_file_name = os.path.join(study_location, assay_file_name)

        assay_df = pd.read_csv(assay_file_name,
                               sep="\t",
                               header=0,
                               encoding='utf-8')
        assay_df = assay_df.replace(np.nan, '', regex=True)  # Remove NaN

        row_index_int = None

        for row in new_rows:
            try:
                row_index_int = int(row['index'])
            except:
                row_index_int is None

            if row_index_int is not None:
                assay_df = assay_df.drop(
                    assay_df.index[row_index_int]
                )  # Remove the old row from the spreadsheet
                # pop the "index:n" from the new_row before updating
                row.pop(
                    'index', None
                )  # Remove "index:n" element from the (JSON) row, this is the original row number
                assay_df = insert_row(row_index_int, assay_df,
                                      row)  # Update the row in the spreadsheet

        # Remove all ".n" numbers at the end of duplicated column names
        assay_df.rename(columns=lambda x: re.sub(r'\.[0-9]+$', '', x),
                        inplace=True)

        # Write the new row back in the file
        assay_df.to_csv(assay_file_name,
                        sep="\t",
                        encoding='utf-8',
                        index=False)

        df_data_dict = totuples(assay_df.reset_index(), 'rows')

        # Get an indexed header row
        df_header = get_table_header(assay_df)

        return {'header': df_header, 'data': df_data_dict}

Example #21

0

Show file

    def put(self, study_id, file_name):
        # param validation
        if study_id is None or file_name is None:
            abort(
                406,
                'Please provide valid parameters for study identifier and TSV file name'
            )

        fname, ext = os.path.splitext(file_name)
        ext = ext.lower()
        if ext not in ('.tsv', '.csv', '.txt'):
            abort(400,
                  "The file " + file_name + " is not a valid TSV or CSV file")

        study_id = study_id.upper()

        try:
            data_dict = json.loads(request.data.decode('utf-8'))
            new_rows = data_dict[
                'data']  # Use "index:n" element, this is the original row number
        except KeyError:
            new_rows = None

        if new_rows is None:
            abort(
                404, "Please provide valid data for updated new row(s). "
                "The JSON string has to have a 'data' element")

        for row in new_rows:
            try:
                row_index = row[
                    'index']  # Check if we have a value in the row number(s)
            except (KeyError, Exception):
                row_index = None

            if new_rows is None or row_index is None:
                abort(
                    404, "Please provide valid data for the updated row(s). "
                    "The JSON string has to have an 'index:n' element in each (JSON) row. "
                    "The header row can not be updated")

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
        study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        file_name = os.path.join(study_location, file_name)

        try:
            file_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        for row in new_rows:
            try:
                row_index_int = int(row['index'])
            except:
                row_index_int is None

            # Validate column names in new rows
            valid_column_name, message = validate_row(file_df, row, 'put')
            if not valid_column_name:
                abort(417, message)

            if row_index_int is not None:
                file_df = file_df.drop(
                    file_df.index[row_index_int]
                )  # Remove the old row from the spreadsheet
                # pop the "index:n" from the new_row before updating
                row.pop(
                    'index', None
                )  # Remove "index:n" element, this is the original row number
                file_df = insert_row(row_index_int, file_df,
                                     row)  # Update the row in the spreadsheet

        message = write_tsv(file_df, file_name)

        df_data_dict = totuples(file_df.reset_index(), 'rows')

        # Get an indexed header row
        df_header = get_table_header(file_df)

        return {'header': df_header, 'data': df_data_dict, 'message': message}

Example #22

0

Show file

File: table_editor.py Project: EBI-Metabolights/MtblsWS-Py

    def put(self, study_id, file_name):
        # param validation
        if study_id is None or file_name is None:
            abort(406, 'Please provide valid parameters for study identifier and TSV file name')
        study_id = study_id.upper()

        try:
            data_dict = json.loads(request.data.decode('utf-8'))
            new_rows = data_dict['data']  # Use "index:n" element, this is the original row number
        except KeyError:
            new_rows = None

        if new_rows is None:
            abort(404, "Please provide valid data for updated new row(s). "
                       "The JSON string has to have a 'data' element")

        for row in new_rows:
            try:
                row_index = row['index']  # Check if we have a value in the row number(s)
            except (KeyError, Exception):
                row_index = None

            if new_rows is None or row_index is None:
                abort(404, "Please provide valid data for the updated row(s). "
                           "The JSON string has to have an 'index:n' element in each (JSON) row. "
                           "The header row can not be updated")

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        file_name = os.path.join(study_location, file_name)

        try:
            file_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        for row in new_rows:
            try:
                row_index_int = int(row['index'])
            except:
                row_index_int is None

            # Validate column names in new rows
            valid_column_name, message = validate_row(file_df, row, 'put')
            if not valid_column_name:
                abort(417, message)

            if row_index_int is not None:
                file_df = file_df.drop(file_df.index[row_index_int])  # Remove the old row from the spreadsheet
                # pop the "index:n" from the new_row before updating
                row.pop('index', None)  # Remove "index:n" element, this is the original row number
                file_df = insert_row(row_index_int, file_df, row)  # Update the row in the spreadsheet

        message = write_tsv(file_df, file_name)

        df_data_dict = totuples(file_df.reset_index(), 'rows')

        # Get an indexed header row
        df_header = get_table_header(file_df)

        return {'header': df_header, 'data': df_data_dict, 'message': message}