Python read_tsv Beispiele, app.ws.utils.read_tsv Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: table_editor.py Projekt: EBI-Metabolights/MtblsWS-Py

    def delete(self, study_id, file_name):
        # query validation
        parser = reqparse.RequestParser()
        parser.add_argument('row_num', help="The row number of the cell(s) to remove (exclude header)", location="args")
        args = parser.parse_args()
        row_num = args['row_num']

        # param validation
        if study_id is None or file_name is None or row_num is None:
            abort(404)
        study_id = study_id.upper()

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        file_name = os.path.join(study_location, file_name)
        try:
            file_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        row_nums = row_num.split(",")

        # Need to remove the highest row number first as the DataFrame dynamically re-orders when one row is removed
        sorted_num_rows = [int(x) for x in row_nums]
        sorted_num_rows.sort(reverse=True)
        for num in sorted_num_rows:
            file_df = file_df.drop(file_df.index[num])  # Drop row(s) in the spreadsheet

        message = write_tsv(file_df, file_name)

        # To be sure we read the file again
        try:
            file_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        df_data_dict = totuples(file_df.reset_index(), 'rows')

        # Get an indexed header row
        df_header = get_table_header(file_df)

        return {'header': df_header, 'data': df_data_dict, 'message': message}

Beispiel #2

0

Datei anzeigen

Datei: compare_files.py Projekt: EBI-Metabolights/MtblsWS-Py

    def get(self, study_id):

        log_request(request)
        # param validation
        if study_id is None:
            abort(404)
        study_id = study_id.upper()

        # User authentication
        user_token = None
        if 'user_token' in request.headers:
            user_token = request.headers['user_token']

        if user_token is None:
            abort(401)

        # query validation
        parser = reqparse.RequestParser()
        parser.add_argument('filename1', help='TSV filename one')
        parser.add_argument('filename2', help='TSV filename two')
        assay_filename = None
        if request.args:
            args = parser.parse_args(req=request)
            filename1 = args['filename1'].lower(
            ) if args['filename1'] else None
            filename2 = args['filename2'].lower(
            ) if args['filename2'] else None
        if not filename1 or not filename2:
            logger.warning("Missing TSV filenames.")
            abort(404, "Missing TSV filenames.")

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not read_access:
            abort(
                401,
                "Study does not exist or your do not have access to this study."
            )

        location = study_location
        df1 = read_tsv(filename1)
        df2 = read_tsv(filename2)
        diff_df = diff_pd(df1, df2)
        return jsonify({"entries": diff_df})

Beispiel #3

0

Datei anzeigen

Datei: table_editor.py Projekt: EBI-Metabolights/MtblsWS-Py

    def post(self, study_id, file_name):

        parser = reqparse.RequestParser()
        parser.add_argument('new_column_name', help="Name of new column")
        new_column_name = None
        parser.add_argument('new_column_position', help="The position (column #) of new column")
        new_column_position = None
        parser.add_argument('new_column_default_value', help="The (optional) default value of new column")
        new_column_default_value = None

        if request.args:
            args = parser.parse_args(req=request)
            new_column_name = args['new_column_name']
            new_column_position = args['new_column_position']
            new_column_default_value = args['new_column_default_value']

        if new_column_name is None:
            abort(404, "Please provide valid name for the new column")

        # param validation
        if study_id is None or file_name is None:
            abort(404, 'Please provide valid parameters for study identifier and file name')
        study_id = study_id.upper()

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        file_name = os.path.join(study_location, file_name)
        try:
            table_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        #  Need to add values for each existing row (not header)
        new_col = []
        for row_val in range(table_df.shape[0]):
            new_col.append(new_column_default_value)

        # Add new column to the spreadsheet
        table_df.insert(loc=int(new_column_position), column=new_column_name, value=new_col, allow_duplicates=True)

        df_data_dict = totuples(table_df.reset_index(), 'rows')

        # Get an indexed header row
        df_header = get_table_header(table_df)

        message = write_tsv(table_df, file_name)

        return {'header': df_header, 'data': df_data_dict, 'message': message}

Beispiel #4

0

Datei anzeigen

Datei: table_editor.py Projekt: EBI-Metabolights/MtblsWS-Py

    def put(self, study_id, file_name):

        try:
            data_dict = json.loads(request.data.decode('utf-8'))
            columns_rows = data_dict['data']
        except KeyError:
            columns_rows = None

        if columns_rows is None:
            abort(404, "Please provide valid key-value pairs for the cell value."
                       "The JSON string has to have a 'data' element")

        # param validation
        if study_id is None or file_name is None:
            abort(404, 'Please provide valid parameters for study identifier and/or file name')
        study_id = study_id.upper()

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        file_name = os.path.join(study_location, file_name)
        try:
            table_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        for column in columns_rows:
            cell_value = column['value']
            row_index = column['row']
            column_index = column['column']
            #  Need to add values for column and row (not header)
            try:
                #for row_val in range(table_df.shape[0]):
                table_df.iloc[int(row_index), int(column_index)] = cell_value
            except ValueError:
                abort(417, "Unable to find the required 'value', 'row' and 'column' values")

        # Write the new row back in the file
        message = write_tsv(table_df, file_name)

        df_data_dict = totuples(table_df.reset_index(), 'rows')

        # Get an indexed header row
        df_header = get_table_header(table_df)

        return {'header': df_header, 'rows': df_data_dict, 'message': message}

Beispiel #5

0

Datei anzeigen

Datei: partner_utils.py Projekt: EBI-Metabolights/MtblsWS-Py

def split_metabolon_assays(study_location, study_id):
    p_start = 'a__POS'
    n_start = 'a__NEG'
    end = '_m'
    pos = p_start + end
    neg = n_start + end
    sample_col = 'Sample Name'

    for a_files in glob.glob(
            os.path.join(study_location,
                         'a__*_metabolite_profiling_mass_spectrometry.txt')):
        if pos in a_files:
            p_assay = read_tsv(a_files)
            p_filename = a_files
            try:
                # split based on 'POSEAR' and 'POSLAT'
                write_tsv(
                    p_assay.loc[p_assay[sample_col].str.contains('POSEAR')],
                    p_filename.replace(pos, p_start + '_1' + end))
                write_tsv(
                    p_assay.loc[p_assay[sample_col].str.contains('POSLAT')],
                    p_filename.replace(pos, p_start + '_2' + end))
            except:
                return False, "Failed to generate 2 POSITIVE ISA-Tab assay files for study " + study_id

        elif neg in a_files:
            n_assay = read_tsv(a_files)
            n_filename = a_files
            try:
                # split based on 'NEG' and 'POL'
                write_tsv(n_assay.loc[n_assay[sample_col].str.contains('NEG')],
                          n_filename.replace(neg, n_start + '_1' + end))
                write_tsv(n_assay.loc[n_assay[sample_col].str.contains('POL')],
                          n_filename.replace(neg, n_start + '_2' + end))
            except:
                return False, "Failed to generate 2 NEGATIVE ISA-Tab assay files for study " + study_id

    status, message = True, "Generated 4 ISA-Tab assay files for study " + study_id

    return status, message

Beispiel #6

0

Datei anzeigen

    def delete(self, study_id, file_name):

        # param validation
        if study_id is None or file_name is None:
            abort(417, "Please provide a study id and TSV file name")

        fname, ext = os.path.splitext(file_name)
        ext = ext.lower()
        if ext not in ('.tsv', '.csv', '.txt'):
            abort(400,
                  "The file " + file_name + " is not a valid TSV or CSV file")

        try:
            data_dict = json.loads(request.data.decode('utf-8'))
            delete_columns = data_dict['data']
        except Exception as e:
            abort(417, str(e))

        # param validation
        columns = delete_columns['columns']
        if columns is None:
            abort(417, 'Please ensure the JSON contains a "columns" element')

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, study_status = \
            wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        audit_status, dest_path = write_audit_files(study_location)

        for column in columns:
            tsv_file = os.path.join(study_location, file_name)
            if not os.path.isfile(tsv_file):
                abort(406, "File " + file_name + " does not exist")
            else:
                file_df = read_tsv(tsv_file)
                try:
                    file_df.drop(column, axis=1, inplace=True)
                    write_tsv(file_df, tsv_file)
                except Exception as e:
                    logger.error("Could not remove column '" + column +
                                 "' from file " + file_name)
                    logger.error(str(e))

        return {"Success": "Removed column(s) from " + file_name}

Beispiel #7

0

Datei anzeigen

Datei: mtbls_maf.py Projekt: EBI-Metabolights/MtblsWS-Py

    def post(self, study_id):
        data_dict = json.loads(request.data.decode('utf-8'))
        assay_file_names = data_dict['data']

        # param validation
        if study_id is None:
            abort(417)

        # param validation
        if assay_file_names is None:
            abort(417, 'Please ensure the JSON has at least one "assay_file_name" element')

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        logger.info('MAF: Getting ISA-JSON Study %s', study_id)
        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not read_access:
            abort(403)

        maf_feedback = ""

        for assay_file_name in assay_file_names:
            annotation_file_name = None
            assay_file = assay_file_name['assay_file_name']
            full_assay_file_name = os.path.join(study_location, assay_file)
            if not os.path.isfile(full_assay_file_name):
                abort(406, "Assay file " + assay_file + " does not exist")
            assay_df = read_tsv(full_assay_file_name)
            annotation_file_name = assay_df['Metabolite Assignment File'].iloc[0]

            maf_df, new_annotation_file_name, new_column_counter = \
                create_maf(None, study_location, assay_file, annotation_file_name=annotation_file_name)
            if annotation_file_name != new_annotation_file_name:
                assay_df['Metabolite Assignment File'] = new_annotation_file_name
                write_tsv(assay_df, full_assay_file_name)
                annotation_file_name = new_annotation_file_name

            if maf_df.empty:
                abort(406, "MAF file could not be created or updated")

            maf_feedback = maf_feedback + ". New row(s):" + str(new_column_counter) + " for assay file " + \
                            annotation_file_name

        return {"success": "Added/Updated MAF(s)" + maf_feedback}

Beispiel #8

0

Datei anzeigen

    def get(self, study_id, file_name):
        # param validation
        if study_id is None or file_name is None:
            logger.info('No study_id and/or TSV file name given')
            abort(404)

        fname, ext = os.path.splitext(file_name)
        ext = ext.lower()
        if ext not in ('.tsv', '.csv', '.txt'):
            abort(400,
                  "The file " + file_name + " is not a valid TSV or CSV file")

        study_id = study_id.upper()
        file_name_param = file_name  # store the passed filename for simplicity

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        logger.info('Assay Table: Getting ISA-JSON Study %s', study_id)
        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not read_access:
            abort(403)

        if file_name == 'metabolights_zooma.tsv':  # This will edit the MetaboLights Zooma mapping file
            if not is_curator:
                abort(403)
            file_name = app.config.get('MTBLS_ZOOMA_FILE')
        else:
            file_name = os.path.join(study_location, file_name)

        logger.info('Trying to load TSV file (%s) for Study %s', file_name,
                    study_id)
        # Get the Assay table or create a new one if it does not already exist
        try:
            file_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        df_data_dict = totuples(file_df.reset_index(), 'rows')

        # Get an indexed header row
        df_header = get_table_header(file_df, study_id, file_name_param)

        return {'header': df_header, 'data': df_data_dict}

Beispiel #9

0

Datei anzeigen

Datei: organism.py Projekt: EBI-Metabolights/MtblsWS-Py

def update_characteristics_in_sample_sheet(onto_name, new_url, header,
                                           old_value, new_value,
                                           study_location, isa_study):
    try:
        """ 
        Update column values in sample file(s). The column header looks like 'Characteristics[<characteristics name>']
        """
        sample_file_name = os.path.join(study_location,
                                        isa_study.filename)  # Sample sheet
        header = 'Characteristics[' + header + ']'

        if sample_file_name:
            df = read_tsv(sample_file_name)
            ''' 
            This is slightly complicated in a DF, identical columns are separated with .n. "Organism part" should 
            always be the 2nd group of columns, but to be sure we should use the column position (col_pos)
            '''
            col_pos = df.columns.get_loc(
                header
            )  # Use this to determine the location of the additional columns
            header_source_ref = df.columns[col_pos +
                                           1]  # 'Term Source REF' (+.n)
            header_acc_number = df.columns[col_pos +
                                           2]  # 'Term Accession Number' (+.n)

            try:

                # if old_value != new_value:  # Do we need to change the cell values?
                df.loc[
                    df[header] == old_value,
                    header_source_ref] = onto_name  # Term Source REF(.n) changed
                df.loc[
                    df[header] == old_value,
                    header_acc_number] = new_url  # Term Accession Number(.n) changed
                df.loc[df[header] == old_value,
                       header] = new_value  # Characteristics name changed
                write_tsv(df, sample_file_name)
                logger.info(old_value + " " + new_value +
                            " has been renamed in " + sample_file_name)
            except Exception as e:
                logger.warning(
                    old_value + " " + new_value +
                    " was not used in the sheet or we failed updating " +
                    sample_file_name + ". Error: " + str(e))

    except Exception as e:
        logger.error("Could not update the ontology value " + old_value +
                     " in " + sample_file_name)

Beispiel #10

0

Datei anzeigen

Datei: chebi_workflow.py Projekt: EBI-Metabolights/MtblsWS-Py

def check_maf_for_pipes(study_location, annotation_file_name):
    annotation_file_name = os.path.join(study_location, annotation_file_name)
    try:
        maf_df = read_tsv(annotation_file_name)
    except FileNotFoundError:
        abort(400, "The file " + annotation_file_name + " was not found")
    maf_len = len(maf_df.index)

    # Any rows to split?
    new_maf_df = split_rows(maf_df)
    new_maf_len = len(new_maf_df.index)

    file_name = annotation_file_name + '.split'
    if maf_len != new_maf_len:  # We did find |, so we create a new MAF
        write_tsv(new_maf_df, file_name)

    return maf_df, maf_len, new_maf_df, new_maf_len, file_name

Beispiel #11

0

Datei anzeigen

Datei: organism.py Projekt: EBI-Metabolights/MtblsWS-Py

def read_characteristics_from_sample_sheet(study_location, isa_study):
    sample_orgs = []
    try:
        sample_file_name = os.path.join(study_location,
                                        isa_study.filename)  # Sample sheet

        if sample_file_name:
            df = read_tsv(sample_file_name)
            ''' 
            This is slightly complicated in a DF, identical columns are separated with .n. "Organism part" should 
            always be the 2nd group of columns, but to be sure we should use the column position (col_pos)
            '''
            col_pos1 = df.columns.get_loc(
                'Characteristics[Organism]'
            )  # Use this to determine the location of the additional columns
            header_source_ref1 = df.columns[col_pos1 + 1]  # 'Term Source REF'
            header_acc_number1 = df.columns[col_pos1 +
                                            2]  # 'Term Accession Number'

            col_pos2 = df.columns.get_loc('Characteristics[Organism part]')
            header_source_ref2 = df.columns[col_pos2 +
                                            1]  # 'Term Source REF' (+.n)
            header_acc_number2 = df.columns[col_pos2 +
                                            2]  # 'Term Accession Number' (+.n)

            new_df = df[[
                'Characteristics[Organism]', header_source_ref1,
                header_acc_number1, 'Characteristics[Organism part]',
                header_source_ref2, header_acc_number2
            ]].copy()

            new_df.columns = [
                'Characteristics[Organism]', 'Term Source REF',
                'Term Accession Number', 'Characteristics[Organism part]',
                'Term Source REF.1', 'Term Accession Number.1'
            ]

            return new_df.drop_duplicates()

    except Exception as e:
        logger.error(
            "Could not read 'Characteristics[Organism]' and/or 'Characteristics[Organism part]' in "
            + sample_file_name)

        abort(400)

Beispiel #12

0

Datei anzeigen

Datei: table_editor.py Projekt: EBI-Metabolights/MtblsWS-Py

    def get(self, study_id, file_name):
        # param validation
        if study_id is None or file_name is None:
            logger.info('No study_id and/or TSV file name given')
            abort(404)
        study_id = study_id.upper()
        file_name_param = file_name  # store the passed filename for simplicity

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        logger.info('Assay Table: Getting ISA-JSON Study %s', study_id)
        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not read_access:
            abort(403)

        if file_name == 'metabolights_zooma.tsv':  # This will edit the MetaboLights Zooma mapping file
            if not is_curator:
                abort(403)
            file_name = app.config.get('MTBLS_ZOOMA_FILE')
        else:
            file_name = os.path.join(study_location, file_name)

        logger.info('Trying to load TSV file (%s) for Study %s', file_name, study_id)
        # Get the Assay table or create a new one if it does not already exist
        try:
            file_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        df_data_dict = totuples(file_df.reset_index(), 'rows')

        # Get an indexed header row
        df_header = get_table_header(file_df, study_id, file_name_param)

        return {'header': df_header, 'data': df_data_dict}

Beispiel #13

0

Datei anzeigen

    def put(self, study_id, file_name):
        # param validation
        if study_id is None or file_name is None:
            abort(
                406,
                'Please provide valid parameters for study identifier and TSV file name'
            )

        fname, ext = os.path.splitext(file_name)
        ext = ext.lower()
        if ext not in ('.tsv', '.csv', '.txt'):
            abort(400,
                  "The file " + file_name + " is not a valid TSV or CSV file")

        study_id = study_id.upper()

        try:
            data_dict = json.loads(request.data.decode('utf-8'))
            new_rows = data_dict[
                'data']  # Use "index:n" element, this is the original row number
        except KeyError:
            new_rows = None

        if new_rows is None:
            abort(
                404, "Please provide valid data for updated new row(s). "
                "The JSON string has to have a 'data' element")

        for row in new_rows:
            try:
                row_index = row[
                    'index']  # Check if we have a value in the row number(s)
            except (KeyError, Exception):
                row_index = None

            if new_rows is None or row_index is None:
                abort(
                    404, "Please provide valid data for the updated row(s). "
                    "The JSON string has to have an 'index:n' element in each (JSON) row. "
                    "The header row can not be updated")

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
        study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        file_name = os.path.join(study_location, file_name)

        try:
            file_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        for row in new_rows:
            try:
                row_index_int = int(row['index'])
            except:
                row_index_int is None

            # Validate column names in new rows
            valid_column_name, message = validate_row(file_df, row, 'put')
            if not valid_column_name:
                abort(417, message)

            if row_index_int is not None:
                file_df = file_df.drop(
                    file_df.index[row_index_int]
                )  # Remove the old row from the spreadsheet
                # pop the "index:n" from the new_row before updating
                row.pop(
                    'index', None
                )  # Remove "index:n" element, this is the original row number
                file_df = insert_row(row_index_int, file_df,
                                     row)  # Update the row in the spreadsheet

        message = write_tsv(file_df, file_name)

        df_data_dict = totuples(file_df.reset_index(), 'rows')

        # Get an indexed header row
        df_header = get_table_header(file_df)

        return {'header': df_header, 'data': df_data_dict, 'message': message}

Beispiel #14

0

Datei anzeigen

    def post(self, study_id, file_name):
        log_request(request)
        try:
            data_dict = json.loads(request.data.decode('utf-8'))
            data = data_dict['data']
            new_row = data['rows']
        except KeyError:
            new_row = None
            data = None

        if new_row is None:
            abort(
                417,
                "Please provide valid data for updated new row(s). The JSON string has to have a 'rows' element"
            )

        try:
            for element in new_row:
                element.pop(
                    'index', None
                )  # Remove "index:n" element, this is the original row number
        except:
            logger.info('No index (row num) supplied, ignoring')

        # param validation
        if study_id is None or file_name is None:
            abort(
                404,
                'Please provide valid parameters for study identifier and TSV file name'
            )

        fname, ext = os.path.splitext(file_name)
        ext = ext.lower()
        if ext not in ('.tsv', '.csv', '.txt'):
            abort(400,
                  "The file " + file_name + " is not a valid TSV or CSV file")

        study_id = study_id.upper()

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        if file_name == 'metabolights_zooma.tsv':  # This will edit the MetaboLights Zooma mapping file
            if not is_curator:
                abort(403)
            file_name = app.config.get('MTBLS_ZOOMA_FILE')
        else:
            file_name = os.path.join(study_location, file_name)

        try:
            file_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file name was not found")

        # Validate column names in new rows
        valid_column_name, message = validate_row(file_df, new_row, "post")
        if not valid_column_name:
            abort(417, message)

        if data:
            try:
                start_index = data['index']
                if start_index == -1:
                    start_index = 0
                start_index = start_index - 0.5

            except KeyError:
                start_index = len(file_df.index)

            # Map the complete row first, update with new_row
            complete_row = {}
            for col in file_df.columns:
                complete_row[col] = ""

            if not new_row:
                logger.warning(
                    "No new row information provided. Adding empty row " +
                    file_name + ", row " + str(complete_row))
            else:
                for row in new_row:
                    complete_row.update(row)
                    row = complete_row
                    line = pd.DataFrame(row, index=[start_index])
                    file_df = file_df.append(line, ignore_index=False)
                    file_df = file_df.sort_index().reset_index(drop=True)
                    start_index += 1

            file_df = file_df.replace(np.nan, '', regex=True)
            message = write_tsv(file_df, file_name)

        # Get an indexed header row
        df_header = get_table_header(file_df)

        # Get the updated data table
        try:
            df_data_dict = totuples(read_tsv(file_name), 'rows')
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        return {'header': df_header, 'data': df_data_dict, 'message': message}

Beispiel #15

0

Datei anzeigen

    def put(self, study_id, file_name):

        try:
            data_dict = json.loads(request.data.decode('utf-8'))
            columns_rows = data_dict['data']
        except KeyError:
            columns_rows = None

        if columns_rows is None:
            abort(
                404, "Please provide valid key-value pairs for the cell value."
                "The JSON string has to have a 'data' element")

        # param validation
        if study_id is None or file_name is None:
            abort(
                404,
                'Please provide valid parameters for study identifier and/or file name'
            )

        fname, ext = os.path.splitext(file_name)
        ext = ext.lower()
        if ext not in ('.tsv', '.csv', '.txt'):
            abort(400,
                  "The file " + file_name + " is not a valid TSV or CSV file")

        study_id = study_id.upper()

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        file_name = os.path.join(study_location, file_name)
        try:
            table_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        for column in columns_rows:
            cell_value = column['value']
            row_index = column['row']
            column_index = column['column']
            #  Need to add values for column and row (not header)
            try:
                # for row_val in range(table_df.shape[0]):
                table_df.iloc[int(row_index), int(column_index)] = cell_value
            except ValueError as e:
                logger.error(
                    "(ValueError) Unable to find the required 'value', 'row' and 'column' values. Value: "
                    + cell_value + ", row: " + row_index + ", column: " +
                    column + ". " + str(e))
                abort(
                    417,
                    "(ValueError) Unable to find the required 'value', 'row' and 'column' values. Value: "
                    + cell_value + ", row: " + row_index + ", column: " +
                    column)
            except IndexError:
                logger.error(
                    "(IndexError) Unable to find the required 'value', 'row' and 'column' values. Value: "
                    + cell_value + ", row: " + row_index + ", column: " +
                    column + ". " + str(e))
                abort(
                    417,
                    "(IndexError) Unable to find the required 'value', 'row' and 'column' values. Value: "
                    + cell_value + ", row: " + row_index + ", column: " +
                    column)

        # Write the new row back in the file
        message = write_tsv(table_df, file_name)

        df_data_dict = totuples(table_df.reset_index(), 'rows')

        # Get an indexed header row
        df_header = get_table_header(table_df)

        return {'header': df_header, 'rows': df_data_dict, 'message': message}

Beispiel #16

0

Datei anzeigen

    def post(self, study_id, file_name):

        try:
            data_dict = json.loads(request.data.decode('utf-8'))
            new_columns = data_dict['data']
        except KeyError:
            new_columns = None

        if new_columns is None:
            abort(
                417,
                "Please provide valid key-value pairs for the new columns."
                "The JSON string has to have a 'data' element")

        # param validation
        if study_id is None or file_name is None:
            abort(
                404,
                'Please provide valid parameters for study identifier and/or file name'
            )
        study_id = study_id.upper()

        fname, ext = os.path.splitext(file_name)
        ext = ext.lower()
        if ext not in ('.tsv', '.csv', '.txt'):
            abort(400,
                  "The file " + file_name + " is not a valid TSV or CSV file")

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
        study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        file_name = os.path.join(study_location, file_name)
        try:
            table_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        audit_status, dest_path = write_audit_files(study_location)

        # Get an indexed header row
        df_header = get_table_header(table_df)

        for column in new_columns:
            new_column_default_value = column['value']
            new_column_name = column['name']
            new_column_position = column['index']

            #  Need to add values for each existing row (not header)
            new_col = []
            for row_val in range(table_df.shape[0]):
                new_col.append(new_column_default_value)

            # Check if we already have the column in the current position
            try:
                header_name = table_df.iloc[:, new_column_position].name
            except:
                header_name = ""

            if header_name == new_column_name:  # We should update the existing column
                table_df.iloc[:, new_column_position] = new_col
            else:
                # Add new column to the spreadsheet
                table_df.insert(loc=int(new_column_position),
                                column=new_column_name,
                                value=new_col,
                                allow_duplicates=True)

        # Get an (updated) indexed header row
        df_header = get_table_header(table_df)

        # Get all indexed rows
        df_data_dict = totuples(table_df.reset_index(), 'rows')

        message = write_tsv(table_df, file_name)

        return {'header': df_header, 'rows': df_data_dict, 'message': message}

Beispiel #17

0

Datei anzeigen

    def delete(self, study_id, file_name):
        # query validation
        parser = reqparse.RequestParser()
        parser.add_argument(
            'row_num',
            help="The row number of the cell(s) to remove (exclude header)",
            location="args")
        args = parser.parse_args()
        row_num = args['row_num']

        # param validation
        if study_id is None or file_name is None or row_num is None:
            abort(404)

        fname, ext = os.path.splitext(file_name)
        ext = ext.lower()
        if ext not in ('.tsv', '.csv', '.txt'):
            abort(400,
                  "The file " + file_name + " is not a valid TSV or CSV file")

        study_id = study_id.upper()

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        file_name = os.path.join(study_location, file_name)
        try:
            file_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        row_nums = row_num.split(",")

        # Need to remove the highest row number first as the DataFrame dynamically re-orders when one row is removed
        sorted_num_rows = [int(x) for x in row_nums]
        sorted_num_rows.sort(reverse=True)
        for num in sorted_num_rows:
            file_df = file_df.drop(
                file_df.index[num])  # Drop row(s) in the spreadsheet

        message = write_tsv(file_df, file_name)

        # To be sure we read the file again
        try:
            file_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        df_data_dict = totuples(file_df.reset_index(), 'rows')

        # Get an indexed header row
        df_header = get_table_header(file_df)

        return {'header': df_header, 'data': df_data_dict, 'message': message}

Beispiel #18

0

Datei anzeigen

Datei: table_editor.py Projekt: EBI-Metabolights/MtblsWS-Py

    def post(self, study_id, file_name):

        try:
            data_dict = json.loads(request.data.decode('utf-8'))
            new_columns = data_dict['data']
        except KeyError:
            new_columns = None

        if new_columns is None:
            abort(417, "Please provide valid key-value pairs for the new columns."
                       "The JSON string has to have a 'data' element")

        # param validation
        if study_id is None or file_name is None:
            abort(404, 'Please provide valid parameters for study identifier and/or file name')
        study_id = study_id.upper()

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        file_name = os.path.join(study_location, file_name)
        try:
            table_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        # Get an indexed header row
        df_header = get_table_header(table_df)

        for column in new_columns:
            new_column_default_value = column['value']
            new_column_name = column['name']
            new_column_position = column['index']

            #  Need to add values for each existing row (not header)
            new_col = []
            for row_val in range(table_df.shape[0]):
                new_col.append(new_column_default_value)

            # Check if we already have the column in the current position
            try:
                header_name = table_df.iloc[:, new_column_position].name
            except:
                header_name = ""

            if header_name == new_column_name:  # We should update the existing column
                table_df.iloc[:, new_column_position] = new_col
            else:
                # Add new column to the spreadsheet
                table_df.insert(loc=int(new_column_position), column=new_column_name,
                                value=new_col, allow_duplicates=True)

        # Get an (updated) indexed header row
        df_header = get_table_header(table_df)

        # Get all indexed rows
        df_data_dict = totuples(table_df.reset_index(), 'rows')

        message = write_tsv(table_df, file_name)

        return {'header': df_header, 'rows': df_data_dict, 'message': message}

Beispiel #19

0

Datei anzeigen

Datei: table_editor.py Projekt: EBI-Metabolights/MtblsWS-Py

    def post(self, study_id, file_name):
        log_request(request)
        try:
            data_dict = json.loads(request.data.decode('utf-8'))
            new_row = data_dict['data']
        except KeyError:
            new_row = None

        if new_row is None:
            abort(417, "Please provide valid data for updated new row(s). The JSON string has to have a 'data' element")

        try:
            for element in new_row:
                element.pop('index', None)  # Remove "index:n" element, this is the original row number
        except:
            logger.info('No index (row num) supplied, ignoring')

        # param validation
        if study_id is None or file_name is None:
            abort(404, 'Please provide valid parameters for study identifier and TSV file name')
        study_id = study_id.upper()

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        if file_name == 'metabolights_zooma.tsv':  # This will edit the MetaboLights Zooma mapping file
            if not is_curator:
                abort(403)
            file_name = app.config.get('MTBLS_ZOOMA_FILE')
        else:
            file_name = os.path.join(study_location, file_name)

        try:
            file_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file name was not found")

        # Validate column names in new rows
        valid_column_name, message = validate_row(file_df, new_row, "post")
        if not valid_column_name:
            abort(417, message)

        if new_row[0]:
            file_df = file_df.append(new_row, ignore_index=True)  # Add new row to the spreadsheet (TSV file)
        else:
            file_df = file_df.append(pd.Series(), ignore_index=True)

        message = write_tsv(file_df, file_name)

        # Get an indexed header row
        df_header = get_table_header(file_df)

        # Get the updated data table
        try:
            df_data_dict = totuples(read_tsv(file_name), 'rows')
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        return {'header': df_header, 'data': df_data_dict, 'message': message}

Beispiel #20

0

Datei anzeigen

Datei: chebi_workflow.py Projekt: EBI-Metabolights/MtblsWS-Py

def search_and_update_maf(study_location, annotation_file_name):
    sdf_file_list = []
    exiting_pubchem_file = False
    short_file_name = os.path.join(study_location, annotation_file_name.replace('.tsv', ''))
    if annotation_file_name.endswith(pubchem_end):
        exiting_pubchem_file = True
        short_file_name = os.path.join(study_location, annotation_file_name.replace(pubchem_end, ''))

    annotation_file_name = os.path.join(study_location, annotation_file_name)
    pd.options.mode.chained_assignment = None  # default='warn'

    standard_maf_columns = {"database_identifier": 0, "chemical_formula": 1, "smiles": 2, "inchi": 3}
    maf_compound_name_column = "metabolite_identification"

    try:
        maf_df = read_tsv(annotation_file_name)
    except FileNotFoundError:
        abort(400, "The file " + annotation_file_name + " was not found")
    maf_len = len(maf_df.index)

    # First make sure the existing pubchem annotated spreadsheet is loaded
    pubchem_df = maf_df.copy()

    if exiting_pubchem_file:  # The has already been split and this is an existing "pubchem" file
        new_maf_df = maf_df.copy()
        new_maf_len = len(new_maf_df.index)
    else:
        # Any rows to split?
        new_maf_df = split_rows(maf_df)
        new_maf_len = len(new_maf_df.index)

        if maf_len != new_maf_len:  # We did find | so we have to use the new dataframe
            maf_df = new_maf_df

        # Remove existing row values first, because that's what we do ;-)
        for column_name in standard_maf_columns:
            maf_df.iloc[:, standard_maf_columns[column_name]] = ""

        pubchem_df = create_pubchem_df(maf_df)

    row_idx = 0
    short_df = maf_df[["database_identifier", maf_compound_name_column]]

    # Search using the compound name column
    for idx, row in short_df.iterrows():
        database_id = row[0]
        comp_name = row[1]
        print(str(idx+1) + ' of ' + str(new_maf_len) + ' : ' + comp_name)
        if not database_id:
            start_time = time.time()
            chebi_found = False
            comp_name = comp_name.rstrip()  # Remove trailing spaces
    #        comp_name = comp_name.encode('ascii', 'ignore')  # Make sure it's only searching using ASCII encoding

            if '/' in comp_name:  # Not a real name
                comp_name = comp_name.replace('/', ' ')

            search_res = wsc.get_maf_search("name", comp_name)  # This is the standard MetaboLights aka Plugin search
            if search_res['content']:
                result = search_res['content'][0]
                database_identifier = result["databaseId"]
                chemical_formula = result["formula"]
                smiles = result["smiles"]
                inchi = result["inchi"]
                name = result["name"]

                pubchem_df.iloc[row_idx, 0] = database_identifier
                pubchem_df.iloc[row_idx, 1] = chemical_formula
                pubchem_df.iloc[row_idx, 2] = smiles
                pubchem_df.iloc[row_idx, 3] = inchi
                # 4 is name / metabolite_identification from MAF

                if name:
                    if database_identifier:
                        if database_identifier.startswith('CHEBI:'):
                            chebi_found = True
                            logger.info("    -- Found ChEBI id " + database_identifier + " based on name")
                            print("    -- Found ChEBI id " + database_identifier + " based on name")
                        maf_df.iloc[row_idx, int(standard_maf_columns['database_identifier'])] = database_identifier
                    if chemical_formula:
                        maf_df.iloc[row_idx, int(standard_maf_columns['chemical_formula'])] = chemical_formula
                    if smiles:
                        maf_df.iloc[row_idx, int(standard_maf_columns['smiles'])] = smiles
                    if inchi:
                        maf_df.iloc[row_idx, int(standard_maf_columns['inchi'])] = inchi

            if not chebi_found:  # We could not find this in ChEBI, let's try other sources
                pc_name, pc_inchi, pc_inchi_key, pc_smiles, pc_cid, pc_formula, pc_synonyms, pc_structure = \
                    pubchem_search(comp_name, study_location)

                cactus_stdinchikey = cactus_search(comp_name, 'stdinchikey')
                opsin_stdinchikey = opsin_search(comp_name, 'stdinchikey')
                cactus_smiles = cactus_search(comp_name, 'smiles')
                opsin_smiles = opsin_search(comp_name, 'smiles')
                cactus_inchi = cactus_search(comp_name, 'stdinchi')
                opsin_inchi = opsin_search(comp_name, 'stdinchi')
                cactus_synonyms = cactus_search(comp_name, 'names')  # Synonyms

                ik = cactus_stdinchikey
                if pc_inchi_key:
                    ik = pc_inchi_key
                csid = get_csid(ik)

                pubchem_df.iloc[row_idx, 5] = pc_name  # 5 PubChem name
                pubchem_df.iloc[row_idx, 6] = pc_cid   # 6 PubChem CID

                if not pc_cid:
                    pc_cid = get_pubchem_cid_on_inchikey(cactus_stdinchikey, opsin_stdinchikey)
                pubchem_df.iloc[row_idx, 7] = pc_cid  # 7 PubChem CID, if none get from InChIKey search (Cactus, OBSIN)
                pubchem_df.iloc[row_idx, 8] = csid  # 8 ChemSpider ID (CSID) from INCHI
                pubchem_df.iloc[row_idx, 9] = get_ranked_values(pc_smiles, cactus_smiles, opsin_smiles, None)  # 9 final smiles
                final_inchi = get_ranked_values(pc_inchi, cactus_inchi, opsin_inchi, None)  # 10 final inchi
                pubchem_df.iloc[row_idx, 10] = final_inchi
                final_inchi_key = get_ranked_values(pc_inchi_key, cactus_stdinchikey, opsin_stdinchikey, None)  # 11 final inchikey
                pubchem_df.iloc[row_idx, 11] = final_inchi_key
                pubchem_df.iloc[row_idx, 12] = pc_smiles  # 12 pc_smiles
                pubchem_df.iloc[row_idx, 13] = cactus_smiles   # 13 cactus_smiles
                pubchem_df.iloc[row_idx, 14] = opsin_smiles  # 14 opsin_smiles
                pubchem_df.iloc[row_idx, 15] = pc_inchi  # 15 PubChem inchi
                pubchem_df.iloc[row_idx, 16] = cactus_inchi  # 16 Cacus inchi
                pubchem_df.iloc[row_idx, 17] = opsin_inchi   # 17 Opsin inchi
                pubchem_df.iloc[row_idx, 18] = pc_inchi_key  # 18 PubChem stdinchikey
                pubchem_df.iloc[row_idx, 19] = cactus_stdinchikey  # 19 cactus_stdinchikey
                pubchem_df.iloc[row_idx, 20] = opsin_stdinchikey   # 20 opsin_stdinchikey
                pubchem_df.iloc[row_idx, 21] = pc_formula   # 21 PubChem formula
                pubchem_df.iloc[row_idx, 22] = pc_synonyms  # 22 PubChem synonyms
                pubchem_df.iloc[row_idx, 23] = cactus_synonyms  # 23 Cactus synonyms
                pubchem_df.iloc[row_idx, 24] = pc_structure  # 24 PubChem structure (SDF)

                # Now we have more information, so let's try to search ChEBI again

                if final_inchi_key and len(final_inchi_key) > 0:
                    chebi_id, inchi, inchikey, name, smiles, formula = direct_chebi_search(final_inchi_key, comp_name)
                    if chebi_id:
                        database_identifier = chebi_id
                        chemical_formula = formula
                        smiles = smiles
                        inchi = inchi
                        name = name

                        logger.info("    -- Found ChEBI id " + database_identifier + " based on final InChIKey")
                        print('    -- Found ChEBI id ' + database_identifier + ' based on final InChIKey')
                        pubchem_df.iloc[row_idx, 0] = database_identifier
                        pubchem_df.iloc[row_idx, 1] = chemical_formula
                        pubchem_df.iloc[row_idx, 2] = smiles
                        pubchem_df.iloc[row_idx, 3] = inchi
                        # 4 is name / metabolite_identification from MAF

                        if name:  # Add to the annotated file as well
                            if database_identifier:
                                maf_df.iloc[row_idx, int(standard_maf_columns['database_identifier'])] = database_identifier
                            if chemical_formula:
                                maf_df.iloc[row_idx, int(standard_maf_columns['chemical_formula'])] = chemical_formula
                            if smiles:
                                maf_df.iloc[row_idx, int(standard_maf_columns['smiles'])] = smiles
                            if inchi:
                                maf_df.iloc[row_idx, int(standard_maf_columns['inchi'])] = inchi

                    else:
                        # Now, if we still don't have a ChEBI accession, download the structure (SDF) from PubChem
                        # and the classyFire SDF
                        sdf_file_list = get_sdf(study_location, pc_cid, pc_name, sdf_file_list, final_inchi)

            logger.info("    -- Search took %s seconds" % round(time.time() - start_time, 2))
            print("    -- Search took %s seconds" % round(time.time() - start_time, 2))
        else:
            print("    -- Skipping. Found database id  " + database_id)

        row_idx += 1

    write_tsv(maf_df, short_file_name + "_annotated.tsv")
    pubchem_file = short_file_name + pubchem_end
    write_tsv(pubchem_df, pubchem_file)
    if sdf_file_list:
        concatenate_sdf_files(sdf_file_list, short_file_name + '_complete.sdf', short_file_name + '_classyfire.sdf')

    return maf_df, maf_len, new_maf_df, new_maf_len, pubchem_file

Beispiel #21

0

Datei anzeigen

    def post(self, study_id, file_name):

        parser = reqparse.RequestParser()
        parser.add_argument('new_column_name', help="Name of new column")
        new_column_name = None
        parser.add_argument('new_column_position',
                            help="The position (column #) of new column")
        new_column_position = None
        parser.add_argument('new_column_default_value',
                            help="The (optional) default value of new column")
        new_column_default_value = None

        if request.args:
            args = parser.parse_args(req=request)
            new_column_name = args['new_column_name']
            new_column_position = args['new_column_position']
            new_column_default_value = args['new_column_default_value']

        if new_column_name is None:
            abort(404, "Please provide valid name for the new column")

        # param validation
        if study_id is None or file_name is None:
            abort(
                404,
                'Please provide valid parameters for study identifier and file name'
            )
        study_id = study_id.upper()

        fname, ext = os.path.splitext(file_name)
        ext = ext.lower()
        if ext not in ('.tsv', '.csv', '.txt'):
            abort(400,
                  "The file " + file_name + " is not a valid TSV or CSV file")

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
        study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        file_name = os.path.join(study_location, file_name)
        try:
            table_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        audit_status, dest_path = write_audit_files(study_location)

        #  Need to add values for each existing row (not header)
        new_col = []
        for row_val in range(table_df.shape[0]):
            new_col.append(new_column_default_value)

        # Add new column to the spreadsheet
        table_df.insert(loc=int(new_column_position),
                        column=new_column_name,
                        value=new_col,
                        allow_duplicates=True)

        df_data_dict = totuples(table_df.reset_index(), 'rows')

        # Get an indexed header row
        df_header = get_table_header(table_df)

        message = write_tsv(table_df, file_name)

        return {'header': df_header, 'data': df_data_dict, 'message': message}

Beispiel #22

0

Datei anzeigen

Datei: stats.py Projekt: EBI-Metabolights/MtblsWS-Py

def update_maf_stats(user_token):

    #database_maf_info_table_actions()  # Truncate, drop and create the database table

    for acc in get_all_study_acc():
        study_id = acc[0]
        maf_len = 0
        sample_len = 0
        assay_len = 0
        print("------------------------------------------ " + study_id + " ------------------------------------------")

        try:
            database_maf_info_table_actions(study_id)
        except ValueError:
            logger.error("Failed to update database for " + study_id)
            continue

        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)

        try:
            isa_study, isa_inv, std_path = iac.get_isa_study(study_id=study_id, api_key=user_token,
                                                             skip_load_tables=True, study_location=study_location)
        except Exception as e:
            logger.error("Failed to load ISA-Tab files for study " + study_id + ". " + str(e))
            continue  # Cannot find the required metadata files, skip to the next study

        try:
            number_of_files = sum([len(files) for r, d, files in os.walk(study_location)])
        except:
            number_of_files = 0

        try:
            smaple_file_name = isa_study.filename
            sample_df = read_tsv(os.path.join(study_location, smaple_file_name))
            sample_len = sample_df.shape[0]
        except FileNotFoundError:
            logger.warning('No sample file found for ' + study_id)

        for assay in isa_study.assays:
            complete_maf = []
            file_name = os.path.join(study_location, assay.filename)
            logger.info('Trying to load TSV file (%s) for Study %s', file_name, study_id)
            # Get the Assay table or create a new one if it does not already exist
            try:
                assay_file_df = read_tsv(file_name)
            except Exception as e:
                logger.error("The file " + file_name + " was not found")
            try:
                assay_len = assay_len + assay_file_df.shape[0]
                assay_maf_name = assay_file_df['Metabolite Assignment File'].iloc[0]
                if not assay_maf_name:
                    continue  # No MAF referenced in this assay
            except Exception:
                logger.error("Error in identifying MAF column in assay")
                continue  # No MAF column found in this assay

            maf_file_name = os.path.join(study_location, assay_maf_name)  # MAF sheet

            if os.path.isfile(maf_file_name):
                try:
                    maf_df = read_tsv(maf_file_name)
                except Exception as e:
                    logger.error("The file " + maf_file_name + " was not found")

                print(study_id + " - Rows: " + str(len(maf_df)) + ". File: " + maf_file_name)
            else:
                print("Could not find file " + maf_file_name)
                continue

            maf_len = maf_len + maf_df.shape[0]

            for idx, row in maf_df.iterrows():
                maf_row = {}
                try:
                    database_identifier = row['database_identifier']
                    metabolite_identification = row['metabolite_identification']
                    maf_row.update({"acc": study_id})
                    maf_row.update({"database_identifier": database_identifier})
                    maf_row.update({"metabolite_identification": metabolite_identification})
                    maf_row.update({"database_found": is_identified(database_identifier)})
                    maf_row.update({"metabolite_found": is_identified(metabolite_identification)})
                except Exception as e:
                    logger.error('MAF stats failed for ' + study_id + '. Error: ' + str(e))
                    continue

                complete_maf.append(maf_row)

            status, msg = update_database_stats(complete_maf)  # Update once per MAF

        study_sql = "UPDATE STUDIES SET sample_rows = " + str(sample_len) + ", assay_rows = " + str(assay_len) + \
                    ", maf_rows = " + str(maf_len) + ", number_of_files = " + str(number_of_files) + \
                    " WHERE ACC = '" + str(study_id) + "';"

        status, msg = insert_update_data(study_sql)
        print("Database updated: " + study_sql)

    return status, msg

Beispiel #23

0

Datei anzeigen

Datei: table_editor.py Projekt: EBI-Metabolights/MtblsWS-Py

    def put(self, study_id, file_name):
        # param validation
        if study_id is None or file_name is None:
            abort(406, 'Please provide valid parameters for study identifier and TSV file name')
        study_id = study_id.upper()

        try:
            data_dict = json.loads(request.data.decode('utf-8'))
            new_rows = data_dict['data']  # Use "index:n" element, this is the original row number
        except KeyError:
            new_rows = None

        if new_rows is None:
            abort(404, "Please provide valid data for updated new row(s). "
                       "The JSON string has to have a 'data' element")

        for row in new_rows:
            try:
                row_index = row['index']  # Check if we have a value in the row number(s)
            except (KeyError, Exception):
                row_index = None

            if new_rows is None or row_index is None:
                abort(404, "Please provide valid data for the updated row(s). "
                           "The JSON string has to have an 'index:n' element in each (JSON) row. "
                           "The header row can not be updated")

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        file_name = os.path.join(study_location, file_name)

        try:
            file_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        for row in new_rows:
            try:
                row_index_int = int(row['index'])
            except:
                row_index_int is None

            # Validate column names in new rows
            valid_column_name, message = validate_row(file_df, row, 'put')
            if not valid_column_name:
                abort(417, message)

            if row_index_int is not None:
                file_df = file_df.drop(file_df.index[row_index_int])  # Remove the old row from the spreadsheet
                # pop the "index:n" from the new_row before updating
                row.pop('index', None)  # Remove "index:n" element, this is the original row number
                file_df = insert_row(row_index_int, file_df, row)  # Update the row in the spreadsheet

        message = write_tsv(file_df, file_name)

        df_data_dict = totuples(file_df.reset_index(), 'rows')

        # Get an indexed header row
        df_header = get_table_header(file_df)

        return {'header': df_header, 'data': df_data_dict, 'message': message}