def get(self, study_id): log_request(request) # param validation if study_id is None: abort(404) # User authentication user_token = None if "user_token" in request.headers: user_token = request.headers["user_token"] else: # user token is required abort(401) # check for access rights is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, study_status = \ wsc.get_permissions(study_id, user_token) if not read_access: abort(403) isa_study, isa_inv, std_path = iac.get_isa_study( study_id, user_token, skip_load_tables=True, study_location=study_location) samples = read_characteristics_from_sample_sheet( study_location, isa_study) return totuples(samples, 'organisms')
def post(self): log_request(request) parser = reqparse.RequestParser() parser.add_argument('term', help="Ontology term") term = None if request.args: args = parser.parse_args(req=request) term = args['term'] # User authentication user_token = None if "user_token" in request.headers: user_token = request.headers["user_token"] if user_token is None: abort(403) # Need to check that the user is actually an active user, ie the user_token exists is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \ study_status = wsc.get_permissions('MTBLS1', user_token) if not is_curator: abort(403) logger = logging.getLogger('wslog') try: file_name = app.config.get('MTBLS_ZOOMA_FILE') table_df = pd.read_csv(file_name, sep="\t", encoding='utf-8') table_df = table_df.replace(np.nan, '', regex=True) if term: try: temp = table_df[table_df['PROPERTY_VALUE'].str.contains(term, na=False, case=False)] l = temp.index.values.tolist() for i in l: query = table_df.iloc[i]['PROPERTY_VALUE'] attribute_name = 'factor' res = ','.join(searchStudies(query, user_token, feature=attribute_name)) table_df.iloc[i]['STUDY'] = res table_df.to_csv(file_name, sep='\t', index=False, encoding='utf-8') except Exception as e: logger.error('Fail to find term in the spreadsheet' + term + str(e)) else: for i in range(len(table_df)): query = table_df.iloc[i]['PROPERTY_VALUE'] attribute_name = 'factor' res = ','.join(searchStudies(query, user_token, feature=attribute_name)) table_df.iloc[i]['STUDY'] = res table_df.to_csv(file_name, sep='\t', index=False, encoding='utf-8') except Exception as e: logger.error('Fail to load metabolights-zooma.tsv' + str(e))
def post(self): log_request(request) parser = reqparse.RequestParser() parser.add_argument('term', help="Ontology term") term = None if request.args: args = parser.parse_args(req=request) term = args['term'] # User authentication user_token = None if "user_token" in request.headers: user_token = request.headers["user_token"] if user_token is None: abort(403) # Need to check that the user is actually an active user, ie the user_token exists is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \ study_status = wsc.get_permissions('MTBLS1', user_token) if not is_curator: abort(403) logger = logging.getLogger('wslog') try: file_name = app.config.get('MTBLS_ZOOMA_FILE') table_df = pd.read_csv(file_name, sep="\t", encoding='utf-8') table_df = table_df.replace(np.nan, '', regex=True) if term: try: temp = table_df[table_df['PROPERTY_VALUE'].str.contains( term, na=False, case=False)] l = temp.index.values.tolist() for i in l: query = table_df.iloc[i]['PROPERTY_VALUE'] attribute_name = 'factor' res = ','.join( searchStudies(query, user_token, feature=attribute_name)) table_df.iloc[i]['STUDY'] = res table_df.to_csv(file_name, sep='\t', index=False, encoding='utf-8') except Exception as e: logger.error('Fail to find term in the spreadsheet' + term + str(e))
def put(self): log_request(request) parser = reqparse.RequestParser() parser.add_argument('studyID', help='Metabolights studyID') studyID = None if request.args: args = parser.parse_args(req=request) studyID = args['studyID'] if studyID: studyID = studyID.strip().upper() else: abort(400) parser.add_argument('organism', help="study organism") org = 'hsa' if request.args: args = parser.parse_args(req=request) organism = args['organism'] if organism: try: org = get_kegg_organism_abbr(organism) except Exception as e: logger.info( "Can't find organism {organism} in KEGG".format( organism=organism)) return "Can't find organism {organism} in KEGG".format( organism=organism) else: abort(400) # module = "module load r-3.6.3-gcc-9.3.0-yb5n44y; module load pandoc-2.7.3-gcc-9.3.0-gctut72;" script = app.config.get('FELLA_PATHWAY_SCRPT') para = '-s {studyID} -o {organism}'.format(studyID=studyID, organism=org) command = script + ' ' + para logger.info("Starting cluster job for FELLA pathway: " + command) status, message, job_out, job_err = lsf_job( app.config.get('LSF_COMMAND_BSUB'), job_param=command, send_email=True) if status: return {"success": message, "message": job_out, "errors": job_err} else: return {"error": message, "message": job_out, "errors": job_err}
def get(self, study_id): log_request(request) # param validation if study_id is None: abort(404) study_id = study_id.upper() # User authentication user_token = None if 'user_token' in request.headers: user_token = request.headers['user_token'] if user_token is None: abort(401) # query validation parser = reqparse.RequestParser() parser.add_argument('filename1', help='TSV filename one') parser.add_argument('filename2', help='TSV filename two') assay_filename = None if request.args: args = parser.parse_args(req=request) filename1 = args['filename1'].lower( ) if args['filename1'] else None filename2 = args['filename2'].lower( ) if args['filename2'] else None if not filename1 or not filename2: logger.warning("Missing TSV filenames.") abort(404, "Missing TSV filenames.") # check for access rights is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \ study_status = wsc.get_permissions(study_id, user_token) if not read_access: abort( 401, "Study does not exist or your do not have access to this study." ) location = study_location df1 = read_tsv(filename1) df2 = read_tsv(filename2) diff_df = diff_pd(df1, df2) return jsonify({"entries": diff_df})
def get(self): """ Return a single user by username. Checks the validity of the param, retrieves the API token from the header and checks its validity and what permissions are available to the bearer of the token. """ log_request(request) # User authentication user_token = None if "user_token" in request.headers: user_token = request.headers["user_token"] else: # user token is required abort(401) is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \ study_status = wsc.get_permissions('MTBLS1', user_token) if not read_access: abort(403) # pull username from query params. username = None user_parser = RequestParsers.username_parser() if request.args: args = user_parser.parse_args(req=request) username = args['username'] # username has not been properly provided, abort with code 400 (bad request). if username is None: abort(400) # query the database for the user, and return the result of the query. return jsonify(get_user(username))
def post(self, ): log_request(request) # User authentication user_token = None if "user_token" in request.headers: user_token = request.headers["user_token"] else: # user token is required abort(401) # check for access rights is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, study_status = \ wsc.get_permissions('MTBLS1', user_token) if not read_access: abort(403) first_name = None last_name = None email = None affiliation = None affiliation_url = None address = None orcid = None metaspace_api_key = None # body content validation try: data_dict = json.loads(request.data.decode('utf-8')) data = data_dict['user'] try: first_name = data['firstName'] last_name = data['lastName'] email = data['email'] affiliation = data['affiliation'] affiliation_url = data['affiliation_url'] address = data['address'] orcid = data['orcid'] metaspace_api_key = data['metaspace_api_key'] except Exception as e: abort(412, str(e)) except (ValidationError, Exception): abort(400, 'Incorrect JSON provided') password, password_encoded, api_token = get_new_password_and_api_token( ) val_email(email) status, message = create_user(first_name, last_name, email, affiliation, affiliation_url, address, orcid, api_token, password_encoded, metaspace_api_key) if status: return { "user_name": email, "api_token": str(api_token), "password": str(password) } else: return {"Error": message}
def put(self): log_request(request) parser = reqparse.RequestParser() # User authentication user_token = None if "user_token" in request.headers: user_token = request.headers["user_token"] else: abort(401) is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, study_status = \ wsc.get_permissions('MTBLS1', user_token) user_name = get_username_by_token(user_token) if not write_access: abort(403) # loading data data_dict = None try: data_dict = json.loads(request.data.decode('utf-8')) except Exception as e: logger.info(e) abort(400) if not data_dict: abort(403) try: wks = getWorksheet(app.config.get('MTBLS_CURATION_LOG'), 'Studies', app.config.get('GOOGLE_SHEET_TOKEN')) except Exception as e: logger.info('Fail to load worksheet.', e) print('Fail to load worksheet.', e) abort(400) return [] output = {'success': [], 'un_success': []} editable_columns = [ 'Study Type', 'Species', 'Place Holder', 'Assigned to' ] for studyID, fields in data_dict.items(): try: r = wks.find(studyID).row # r, _ = getCellCoordinate(app.config.get('MTBLS_CURATION_LOG'), 'Studies', # app.config.get('GOOGLE_SHEET_TOKEN'), studyID) except: logger.info('Can find {studyID} in curation log'.format( studyID=studyID)) print('Can find {studyID} in curation log'.format( studyID=studyID)) continue for field, value in fields.items(): if field in editable_columns: c = wks.find(field).col # _, c = getCellCoordinate(app.config.get('MTBLS_CURATION_LOG'), 'Studies', # app.config.get('GOOGLE_SHEET_TOKEN'), field) if update_cell(wks, r, c, value): output['success'].append( "{user_name} updated {studyID} - {field} to {value}" .format(user_name=user_name, studyID=studyID, field=field, value=value)) logger.info( "{user_name} updated {studyID} - {field} to {value}" .format(user_name=user_name, studyID=studyID, field=field, value=value)) print( "{user_name} updated {studyID} - {field} to {value}" .format(user_name=user_name, studyID=studyID, field=field, value=value)) else: logger.info( 'Permission denied modify {studyID} {field}'.format( studyID=studyID, field=field)) print('Permission denied modify {studyID} {field}'.format( studyID=studyID, field=field)) output['un_success'].append( 'Permission denied modify {studyID} {field}'.format( studyID=studyID, field=field)) continue return jsonify(output)
def get(self): log_request(request) parser = reqparse.RequestParser() # studyID parser.add_argument('studyID', help='studyID') studyID = None if request.args: args = parser.parse_args(req=request) studyID = args['studyID'] if studyID: if ',' in studyID: studyID = studyID.split(',') else: studyID = [studyID] studyID = [x.upper() for x in studyID] # column parser.add_argument('field', help='column name(s)') field = None if request.args: args = parser.parse_args(req=request) field = args['field'] if field: if ',' in field: field = field.split(',') else: field = [field] # page parser.add_argument('page', help='page number') page = None if request.args: args = parser.parse_args(req=request) page = args['page'] if page != None: page = int(args['page']) # User authentication user_token = None if "user_token" in request.headers: user_token = request.headers["user_token"] else: abort(401) # check for access rights is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, study_status = \ wsc.get_permissions('MTBLS1', user_token) if not write_access: abort(403) # Load google sheet try: google_df = getGoogleSheet(app.config.get('MTBLS_CURATION_LOG'), 'Studies', app.config.get('GOOGLE_SHEET_TOKEN')) google_df = google_df.set_index('MTBLS ID') except Exception as e: logger.info('Fail to load google sheet:', e) abort(404) return [] if studyID == None or (len(studyID) > 100 and page != None): studyID = list( google_df.index.values)[100 * (page - 1):(100 * (page - 1) + 100)] # entire sheet if studyID == None and field == None: result = google_df.to_json(orient="index") # entire column elif studyID == None and len(field) > 0: result = google_df[field].to_json(orient="columns") # entire row elif len(studyID) > 0 and field == None: result = google_df.loc[studyID, :].to_json(orient="index") # combination else: result = google_df.loc[studyID, field].to_json(orient="index") return json.loads(result)
def post(self): log_request(request) parser = reqparse.RequestParser() # query field parser.add_argument('query', help='Report query') query = None if request.args: args = parser.parse_args(req=request) query = args['query'] if query: query = query.strip() # study ID parser.add_argument('studyid', help='Study ID') studyid = None if request.args: args = parser.parse_args(req=request) studyid = args['studyid'] if studyid: studyid = studyid.strip().upper() # User authentication user_token = None if "user_token" in request.headers: user_token = request.headers["user_token"] else: abort(401) # check for access rights is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, study_status = \ wsc.get_permissions('MTBLS1', user_token) if not write_access: abort(403) reporting_path = app.config.get('MTBLS_FTP_ROOT') + app.config.get('REPORTING_PATH') + 'global/' file_name = '' res = '' if query == 'daily_stats': try: sql = open('./instance/study_report.sql', 'r').read() postgresql_pool, conn, cursor = get_connection() cursor.execute(sql) dates = cursor.fetchall() data = {} for dt in dates: dict_temp = {dt[0].strftime('%Y-%m-%d'): {'studies_created': dt[1], 'public': dt[2], 'review': dt[3], 'curation': dt[4], 'user': dt[5] } } data = {**data, **dict_temp} res = {"created_at": "2020-07-07", "updated_at": datetime.today().strftime('%Y-%m-%d'), 'data': data} file_name = 'daily_report.json' except Exception as e: logger.info(e) print(e) if query == 'user_stats': # try: file_name = 'study_report.json' study_data = readDatafromFile(reporting_path + file_name) sql = open('./instance/user_report.sql', 'r').read() postgresql_pool, conn, cursor = get_connection() cursor.execute(sql) result = cursor.fetchall() data = {} user_count = 0 active_user = 0 for dt in result: study_list = dt[6].split(",") studies = {} for x in study_list: try: temp = study_data['data'][x.strip()] studies[x.strip()] = temp except: continue dict_temp = {str(dt[0]): {"name": dt[13], "user_email": str(dt[1]), "country_code": dt[2], "joindate": dt[12], "total": str(dt[5]), "submitted": str(dt[7]), "review": str(dt[9]), "curation": str(dt[8]), "public": str(dt[10]), "dormant": str(dt[11]), "affiliation": dt[3], "user_status": str(dt[4]), "studies": studies, } } data = {**data, **dict_temp} user_count += 1 if dt[4] == 2: active_user += 1 # data['user_count'] = str(user_count) # data['active_user'] = str(active_user) res = {"created_at": "2020-07-07", "updated_at": datetime.today().strftime('%Y-%m-%d'), "user_count": str(user_count), "active_user": str(active_user), "data": data} file_name = 'user_report.json' if query == 'study_stats': postgresql_pool, conn, cursor = get_connection() cursor.execute( "select acc from studies") studies = cursor.fetchall() data = {} for st in studies: print(st[0]) study_files, latest_update_time = get_all_files( app.config.get('STUDY_PATH') + str(st[0])) study_info = get_study(st[0]) name = study_info.pop('submitter').split(',') country = study_info.pop('country').split(',') name_d = [{'name': x} for x in name] country_d = [{'country': x} for x in country] submitter = [] for x in zip(name_d, country_d): res = {} for y in x: res.update(y) submitter.append(res) study_info['submitter'] = submitter study_info['latest_update_time'] = latest_update_time study_info['study_files'] = study_files dict_temp = {str(st[0]): study_info} data = {**data, **dict_temp} file_name = 'study_report.json' res = {'data': data} res["updated_at"] = datetime.today().strftime('%Y-%m-%d') if query == 'global': file_name = 'global.json' j_data = readDatafromFile(reporting_path + file_name) # load global.json and update if studyid: studyid = studyid.upper() # load global.json and clean the date set j_data = clean_json(j_data, studyid) # techniques res1 = get_techniques(studyID=studyid) for tech, value in res1['techniques'].items(): if tech in j_data['data']['techniques']: print(tech) j_data['data']['techniques'][tech] += value # res['techniques'][tech] else: j_data['data']['techniques'].update({tech: value}) # study_type res2 = get_studytype(studyID=studyid) j_data['data']['study_type']['targeted'] += res2['study_type']['targeted'] j_data['data']['study_type']['untargeted'] += res2['study_type']['untargeted'] j_data['data']['study_type']['targeted_untargeted'] += res2['study_type']['targeted_untargeted'] # instruments & organisms ins, org = get_instruments_organism(studyID=studyid) for i, value in ins['instruments'].items(): if i not in j_data['data']['instruments']: j_data['data']['instruments'].update({i: value}) else: for studies, v in ins['instruments'][i].items(): j_data['data']['instruments'][i].update({studies: v}) # organisms for o, org_part in org['organisms'].items(): if o not in j_data['data']['organisms']: j_data['data']['organisms'].update({o: org_part}) else: for org_p, studies in org_part.items(): if org_p not in j_data['data']['organisms'][o]: j_data['data']['organisms'][o].update({org_p: studies}) else: j_data['data']['organisms'][o][org_p] += studies # generate new global file else: # techniques techs = get_techniques() j_data['data']['techniques'] = techs['techniques'] # study_type types = get_studytype() j_data['data']['study_type'] = types['study_type'] # instruments & organisms i, s = get_instruments_organism() j_data['data']['instruments'] = i['instruments'] j_data['data']['organisms'] = s['organisms'] j_data["updated_at"] = datetime.today().strftime('%Y-%m-%d') res = j_data if query == 'file_extension': file_name = 'file_extension.json' postgresql_pool, conn, cursor = get_connection() cursor.execute( "select acc from studies where status = 3;") studies = cursor.fetchall() file_ext = [] for studyID in studies: print(studyID[0]) logger.info("Extracting study extension details: " + studyID[0]) wd = os.path.join(app.config.get('STUDY_PATH'), studyID[0]) try: file_ext.append(get_file_extensions(studyID[0], wd)) except: print("Error extracting study extension details: " + studyID[0]) res = {"created_at": "2020-03-22", "updated_at": datetime.today().strftime('%Y-%m-%d'), 'data': file_ext} # j_res = json.dumps(res,indent=4) writeDataToFile(reporting_path + file_name, res, True) return jsonify({"POST " + file_name: True})
def get(self): global start_date, query_field global end_date log_request(request) parser = reqparse.RequestParser() parser.add_argument('query', help='Report query') query = None if request.args: args = parser.parse_args(req=request) query = args['query'] if query: query = query.strip() parser.add_argument('start', help='start date') if request.args: args = parser.parse_args(req=request) start = args['start'] if start: start_date = datetime.strptime(start, '%Y%m%d') else: start_date = datetime.strptime('20110809', '%Y%m%d') parser.add_argument('end', help='end date') if request.args: args = parser.parse_args(req=request) end = args['end'] if end: end_date = datetime.strptime(end, '%Y%m%d') else: end_date = datetime.today() parser.add_argument('studyStatus', help='studyStatus') studyStatus = None if request.args: args = parser.parse_args(req=request) studyStatus = args['studyStatus'] if studyStatus: studyStatus = tuple([x.strip() for x in studyStatus.split(',')]) parser.add_argument('queryFields', help='queryFields') query_field = None if request.args: args = parser.parse_args(req=request) queryFields = args['queryFields'] if queryFields: query_field = tuple([x.strip().lower() for x in queryFields.split(',')]) # User authentication user_token = None if "user_token" in request.headers: user_token = request.headers["user_token"] else: abort(401) # check for access rights is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, study_status = \ wsc.get_permissions('MTBLS1', user_token) if not write_access: if query in ['study_status', "global"]: studyStatus = ['public'] else: abort(403) reporting_path = app.config.get('MTBLS_FTP_ROOT') + app.config.get('REPORTING_PATH') + 'global/' if query == 'daily_stats': file_name = 'daily_report.json' j_file = readDatafromFile(reporting_path + file_name) data_res = {} for date, report in j_file['data'].items(): d = datetime.strptime(date, '%Y-%m-%d') if d >= start_date and d <= end_date: if query_field != None: slim_report = {k: report[k] for k in query_field} data_res.update({date: slim_report}) else: data_res.update({date: report}) else: continue j_file['data'] = data_res return jsonify(j_file) elif query == 'user_stats': file_name = 'user_report.json' j_file = readDatafromFile(reporting_path + file_name) return jsonify(j_file) elif query == 'global': file_name = 'global.json' j_file = readDatafromFile(reporting_path + file_name) return jsonify(j_file) elif query == 'file_extension': file_name = 'file_extension.json' j_file = readDatafromFile(reporting_path + file_name) return jsonify(j_file) elif query == 'study_status': file_name = 'study_report.json' j_file = readDatafromFile(reporting_path + file_name) data_res = {} for studyID, study_info in j_file['data'].items(): d = datetime.strptime(study_info['submissiondate'], '%Y-%m-%d') status = study_info['status'] if studyStatus == None: if d >= start_date and d <= end_date: data_res.update({studyID: study_info}) else: continue else: if d >= start_date and d <= end_date and status.lower() in studyStatus: data_res.update({studyID: study_info}) else: continue j_file['data'] = data_res return jsonify(j_file) else: file_name = '' abort(404)
def post(self, study_id, file_name): log_request(request) try: data_dict = json.loads(request.data.decode('utf-8')) data = data_dict['data'] new_row = data['rows'] except KeyError: new_row = None data = None if new_row is None: abort( 417, "Please provide valid data for updated new row(s). The JSON string has to have a 'rows' element" ) try: for element in new_row: element.pop( 'index', None ) # Remove "index:n" element, this is the original row number except: logger.info('No index (row num) supplied, ignoring') # param validation if study_id is None or file_name is None: abort( 404, 'Please provide valid parameters for study identifier and TSV file name' ) fname, ext = os.path.splitext(file_name) ext = ext.lower() if ext not in ('.tsv', '.csv', '.txt'): abort(400, "The file " + file_name + " is not a valid TSV or CSV file") study_id = study_id.upper() # User authentication user_token = None if "user_token" in request.headers: user_token = request.headers["user_token"] # check for access rights is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \ study_status = wsc.get_permissions(study_id, user_token) if not write_access: abort(403) if file_name == 'metabolights_zooma.tsv': # This will edit the MetaboLights Zooma mapping file if not is_curator: abort(403) file_name = app.config.get('MTBLS_ZOOMA_FILE') else: file_name = os.path.join(study_location, file_name) try: file_df = read_tsv(file_name) except FileNotFoundError: abort(400, "The file name was not found") # Validate column names in new rows valid_column_name, message = validate_row(file_df, new_row, "post") if not valid_column_name: abort(417, message) if data: try: start_index = data['index'] if start_index == -1: start_index = 0 start_index = start_index - 0.5 except KeyError: start_index = len(file_df.index) # Map the complete row first, update with new_row complete_row = {} for col in file_df.columns: complete_row[col] = "" if not new_row: logger.warning( "No new row information provided. Adding empty row " + file_name + ", row " + str(complete_row)) else: for row in new_row: complete_row.update(row) row = complete_row line = pd.DataFrame(row, index=[start_index]) file_df = file_df.append(line, ignore_index=False) file_df = file_df.sort_index().reset_index(drop=True) start_index += 1 file_df = file_df.replace(np.nan, '', regex=True) message = write_tsv(file_df, file_name) # Get an indexed header row df_header = get_table_header(file_df) # Get the updated data table try: df_data_dict = totuples(read_tsv(file_name), 'rows') except FileNotFoundError: abort(400, "The file " + file_name + " was not found") return {'header': df_header, 'data': df_data_dict, 'message': message}
def post(self, study_id): log_request(request) # param validation if study_id is None: abort(404) # query validation parser = reqparse.RequestParser() parser.add_argument('existing_char_name', help="Characteristics name") parser.add_argument('existing_char_value', help="Characteristics value") args = parser.parse_args() existing_characteristics_name = args['existing_char_name'] existing_characteristics_value = args['existing_char_value'] if existing_characteristics_name is None or existing_characteristics_value is None: abort(404) # User authentication user_token = None if "user_token" in request.headers: user_token = request.headers["user_token"] else: # user token is required abort(401) # check for keeping copies save_audit_copy = False save_msg_str = "NOT be" if "save_audit_copy" in request.headers and \ request.headers["save_audit_copy"].lower() == 'true': save_audit_copy = True save_msg_str = "be" # check for access rights is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, study_status = \ wsc.get_permissions(study_id, user_token) if not write_access: abort(403) isa_study, isa_inv, std_path = iac.get_isa_study( study_id, user_token, skip_load_tables=True, study_location=study_location) # body content validation updated_characteristics = None new_column_name = None onto = None new_value = None new_url = None try: data_dict = json.loads(request.data.decode('utf-8')) data = data_dict['characteristics'] # if partial=True missing fields will be ignored try: # result = SampleSchema().load(data, many=True, partial=False) # We do not have to map the charcaeristics json to a schema a we are using this to directly # update the dataframe. The ontology we use more than one time, so map that new_column_name = data[0]['characteristicsName'] char_type = data[0]['characteristicsType'] new_value = char_type['annotationValue'] new_url = char_type['termAccession'] term_source = char_type['termSource'] onto = OntologySource(name=term_source['name'], version=term_source['version'], file=term_source['file'], description=term_source['description']) # Check that the ontology is referenced in the investigation add_ontology_to_investigation(isa_inv, onto.name, onto.version, onto.file, onto.description) except Exception as e: abort(412) except (ValidationError, Exception): abort(400) # update Study Factor details logger.info('Updating Study Characteristics details for %s', study_id) if existing_characteristics_name != new_column_name: # update the column header value for characteristics update_ontolgies_in_isa_tab_sheets('characteristics', existing_characteristics_name, new_column_name, study_location, isa_study) # Now, it the cell values that needs updating update_characteristics_in_sample_sheet(onto.name, new_url, new_column_name, existing_characteristics_value, new_value, study_location, isa_study) logger.info("A copy of the previous files will %s saved", save_msg_str) iac.write_isa_study(isa_inv, user_token, std_path, save_investigation_copy=save_audit_copy) logger.info('Updated %s', existing_characteristics_value) return {"Success": " Sample sheet updated"}
def post(self): log_request(request) parser = reqparse.RequestParser() parser.add_argument('source', help='source to update') source = None if request.args: args = parser.parse_args(req=request) source = args['source'] if source: source = source.strip() # User authentication user_token = None if "user_token" in request.headers: user_token = request.headers["user_token"] else: abort(401) # check for access rights is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, study_status = \ wsc.get_permissions('MTBLS1', user_token) if not is_curator: abort(403) if source == 'curation log-Database Query': try: logger.info('Updating curation log-Database Query') curation_log_database_query() return jsonify({'curation log update': True}) except Exception as e: logger.info(e) print(e) elif source == 'curation log-Database update': try: logger.info('Updating curation log-Database update') curation_log_database_update() return jsonify({'Database update': True}) except Exception as e: logger.info(e) print(e) elif source == 'MTBLS statistics': try: logger.info('Updating MTBLS statistics') MTBLS_statistics_update() return jsonify({'success': True}) except Exception as e: logger.info(e) print(e) elif source == 'empty studies': try: logger.info('Get list of empty studies') blank_inv, no_inv = get_empty_studies() return jsonify({ 'Investigation files check': { 'Empty investigation': { 'counts': len(blank_inv), 'list': blank_inv }, 'Missing investigation': { 'counts': len(no_inv), 'list': no_inv } } }) except Exception as e: logger.info(e) print(e) elif source == 'MARIANA study_classify': data = { 'data': { **untarget_NMR(), **untarget_LCMS(), **NMR_and_LCMS() } } time_stamp = { "created_at": "2020-07-20", "updated_at": datetime.today().strftime('%Y-%m-%d') } res = {**time_stamp, **data} file_name = 'study_classify.json' file_path = app.config.get('MTBLS_FTP_ROOT') + app.config.get( 'MARIANA_PATH') writeDataToFile(file_path + file_name, res, True) return jsonify(res) elif source == 'ftp file permission': submit, curation, review = file_permission() if len(submit) + len(curation) + len(review) == 0: return jsonify({'result': 'Nothing to change'}) else: res = { "Change ftp folder access permission": { 'Submission studies (770)': submit, 'In curation studies (750)': curation, 'In review studies (550)': review } } return jsonify(res) elif source == 'test cronjob': pass else: abort(400)
def get(self): log_request(request) parser = reqparse.RequestParser() parser.add_argument('studyID', help='Metabolights studyID') studyID = None if request.args: args = parser.parse_args(req=request) studyID = args['studyID'] if studyID: studyID = studyID.strip().upper() parser.add_argument('kegg_only', help="only return kegg IDs") if request.args: args = parser.parse_args(req=request) kegg = args['kegg_only'] if not kegg: kegg_only = False elif kegg and kegg.lower() in ['true', '1']: kegg_only = True elif kegg and kegg.lower() in ['false', '0']: kegg_only = False else: abort(400) # chebiID = [] # keggID = [] result = {} # if len(request.data.decode('utf-8')) > 0: # try: # data_dict = json.loads(request.data.decode('utf-8')) # chebiID = data_dict['CHEBIID'] # keggID = data_dict['KEGGID'] # except Exception as e: # logger.info(e) # print(e) # abort(400) if studyID: uni_organism = uniqueOrganism(studyID) if len(uni_organism) > 1: res = {org: [] for org in uni_organism} # get list of ISA files try: assay_file, investigation_file, sample_file, maf_file = getFileList( studyID) except: assay_file, investigation_file, sample_file, maf_file = '', '', '', '' print('Fail to load study ', studyID) # sample sample = get_sample_file(studyID=studyID, sample_file_name=sample_file) sample = sample[['Sample Name', 'Characteristics[Organism]']] organisms = list(sample['Characteristics[Organism]'].unique()) # maf from collections import defaultdict result = defaultdict(list, {key: [] for key in organisms}) for maf_name in maf_file: res = maf_reader(studyID, maf_name, sample_df=sample) for i, j in res.items(): result[i].extend(j) result = dict(result) elif len(uni_organism) == 1: query = '''SELECT DISTINCT DATABASE_IDENTIFIER FROM MAF_INFO WHERE ACC = '{studyID}' AND (DATABASE_IDENTIFIER <> '') IS NOT FALSE'''.format( studyID=studyID) postgresql_pool, conn, cursor = get_connection() cursor.execute(query) # d= cursor.fetchall() ID = [r[0] for r in cursor.fetchall()] result = {uni_organism[0]: ID} else: abort(400) for org, ids in result.items(): pair1 = match_chebi_kegg( [x for x in ids if 'chebi' in x.lower()], []) pair2 = match_hmdb_kegg( [x for x in ids if 'hmdb' in x.lower()], []) result[org] = {**pair1, **pair2} # elif len(chebiID) > 0 or len(keggID) > 0: # result['input_ids'] = match_chebi_kegg(chebiID, keggID) if kegg_only: try: res = { k: [x.lstrip('cpd:').upper() for x in list(v.values())] for k, v in result.items() if len(v) > 0 } result = {} for k in res.keys(): new_key = get_kegg_organism_abbr(k) result[new_key] = res[k] return jsonify(result) except: return [] else: return jsonify(result)
def post(self, study_id, file_name): log_request(request) try: data_dict = json.loads(request.data.decode('utf-8')) new_row = data_dict['data'] except KeyError: new_row = None if new_row is None: abort(417, "Please provide valid data for updated new row(s). The JSON string has to have a 'data' element") try: for element in new_row: element.pop('index', None) # Remove "index:n" element, this is the original row number except: logger.info('No index (row num) supplied, ignoring') # param validation if study_id is None or file_name is None: abort(404, 'Please provide valid parameters for study identifier and TSV file name') study_id = study_id.upper() # User authentication user_token = None if "user_token" in request.headers: user_token = request.headers["user_token"] # check for access rights is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \ study_status = wsc.get_permissions(study_id, user_token) if not write_access: abort(403) if file_name == 'metabolights_zooma.tsv': # This will edit the MetaboLights Zooma mapping file if not is_curator: abort(403) file_name = app.config.get('MTBLS_ZOOMA_FILE') else: file_name = os.path.join(study_location, file_name) try: file_df = read_tsv(file_name) except FileNotFoundError: abort(400, "The file name was not found") # Validate column names in new rows valid_column_name, message = validate_row(file_df, new_row, "post") if not valid_column_name: abort(417, message) if new_row[0]: file_df = file_df.append(new_row, ignore_index=True) # Add new row to the spreadsheet (TSV file) else: file_df = file_df.append(pd.Series(), ignore_index=True) message = write_tsv(file_df, file_name) # Get an indexed header row df_header = get_table_header(file_df) # Get the updated data table try: df_data_dict = totuples(read_tsv(file_name), 'rows') except FileNotFoundError: abort(400, "The file " + file_name + " was not found") return {'header': df_header, 'data': df_data_dict, 'message': message}