def setup(): global fid paths = get_test_data() out_file = 'test_query.h5' mzml_to_hdf(paths['basic'], out_file_name=out_file) fid = tables.open_file('test_query.h5')
def test_loader(): paths = get_test_data() out_file = 'test_loader.h5' mzml_to_hdf(paths['basic'], out_file_name=out_file) check_file(out_file)
def test_ms_ms(): paths = get_test_data() file2 = 'ms_ms.h5' mzml_to_hdf(paths['ms_ms'], out_file_name=file2) fid = tables.open_file(file2) table = fid.root.ms2_pos assert table.nrows == 604775, table.nrows
def test_32_64(): paths = get_test_data() fname = 'mix32_64.h5' mzml_to_hdf(paths['mix32_64'], out_file_name=fname) fid = tables.open_file(fname) print(fid) table = fid.root.ms1_pos assert table.nrows == 1041183, table.nrows table2 = fid.root.ms1_neg assert table2.nrows == 762699, table2.nrows
def transform(shock_service_url=None, handle_service_url=None, output_file_name=None, input_directory=None, working_directory=None, shock_id=None, handle_id=None, input_mapping=None, mzml_file_name=None, polarity=None, atlases=None, group=None, inclusion_order=None, normalization_factor=None, retention_correction=None, level=logging.INFO, logger=None): """ Converts mzML file to MetaboliteAtlas2_MAFileInfo json string. Args: shock_service_url: A url for the KBase SHOCK service. handle_service_url: A url for the KBase Handle Service. output_file_name: A file name where the output JSON string should be stored. If the output file name is not specified the name will default to the name of the input file appended with '_finfo'. input_directory: The directory where files will be read from. working_directory: The directory the resulting json file will be written to. shock_id: Shock id for the hdf file if it already exists in shock handle_id: Handle id for the hdf file if it already exists as a handle input_mapping: JSON string mapping of input files to expected types. If you don't get this you need to scan the input directory and look for your files. level: Logging level, defaults to logging.INFO. atlases: List of MetaboliteAtlas atlas IDs. mzml_file_name: Name of the file, optional. Defaults to the file name. polarity: Run polarity. group: Run group. inclusion_order: Run inclusion_order. retention_correction: Run retention_correction. normalization_factor: Run normalization factor. Returns: JSON files on disk that can be saved as a KBase workspace objects. Authors: Steven Silvester """ if logger is None: logger = script_utils.stderrlogger(__file__) logger.info("Starting conversion of mzML to MetaboliteAtlas2.MAFileInfo") token = os.environ.get('KB_AUTH_TOKEN') if not working_directory or not os.path.isdir(working_directory): raise Exception( "The working directory {0} is not a valid directory!".format( working_directory)) logger.info("Scanning for mzML files.") valid_extensions = [".mzML"] files = os.listdir(input_directory) mzml_files = [ x for x in files if os.path.splitext(x)[-1] in valid_extensions ] assert len(mzml_files) != 0 logger.info("Found {0} files".format(len(mzml_files))) for fname in mzml_files: path = os.path.join(input_directory, fname) if not os.path.isfile(path): raise Exception( "The input file name {0} is not a file!".format(path)) hdf_file = mzml_loader.mzml_to_hdf(path) if shock_service_url: shock_info = script_utils.upload_file_to_shock(logger, shock_service_url, hdf_file, token=token) run_info = dict() run_info['mzml_file_name'] = (mzml_file_name or fname.replace('.mzML', '')) run_info['atlases'] = atlases or [] if polarity is not None: run_info['polarity'] = polarity if group is not None: run_info['group'] = group if inclusion_order is not None: run_info['inclusion_order'] = inclusion_order if normalization_factor is not None: run_info['normalization_factor'] = normalization_factor if retention_correction is not None: run_info['retention_correction'] = retention_correction if shock_service_url: handle_id = script_utils.getHandles(logger, shock_service_url, handle_service_url, [shock_info["id"]], token=token)[0] run_info["run_file_id"] = handle_id else: run_info['run_file_id'] = hdf_file output_file_name = fname.replace('.mzML', '_finfo.json') # This generates the json for the object objectString = simplejson.dumps(run_info, sort_keys=True, indent=4) output_file_path = os.path.join(working_directory, output_file_name) with open(output_file_path, "w") as outFile: outFile.write(objectString) logger.info("Conversion completed.")
def transform(shock_service_url=None, handle_service_url=None, output_file_name=None, input_directory=None, working_directory=None, shock_id=None, handle_id=None, input_mapping=None, mzml_file_name=None, polarity=None, atlases=None, group=None, inclusion_order=None, normalization_factor=None, retention_correction=None, level=logging.INFO, logger=None): """ Converts mzML file to MetaboliteAtlas2_MAFileInfo json string. Args: shock_service_url: A url for the KBase SHOCK service. handle_service_url: A url for the KBase Handle Service. output_file_name: A file name where the output JSON string should be stored. If the output file name is not specified the name will default to the name of the input file appended with '_finfo'. input_directory: The directory where files will be read from. working_directory: The directory the resulting json file will be written to. shock_id: Shock id for the hdf file if it already exists in shock handle_id: Handle id for the hdf file if it already exists as a handle input_mapping: JSON string mapping of input files to expected types. If you don't get this you need to scan the input directory and look for your files. level: Logging level, defaults to logging.INFO. atlases: List of MetaboliteAtlas atlas IDs. mzml_file_name: Name of the file, optional. Defaults to the file name. polarity: Run polarity. group: Run group. inclusion_order: Run inclusion_order. retention_correction: Run retention_correction. normalization_factor: Run normalization factor. Returns: JSON files on disk that can be saved as a KBase workspace objects. Authors: Steven Silvester """ if logger is None: logger = script_utils.stderrlogger(__file__) logger.info("Starting conversion of mzML to MetaboliteAtlas2.MAFileInfo") token = os.environ.get('KB_AUTH_TOKEN') if not working_directory or not os.path.isdir(working_directory): raise Exception("The working directory {0} is not a valid directory!" .format(working_directory)) logger.info("Scanning for mzML files.") valid_extensions = [".mzML"] files = os.listdir(input_directory) mzml_files = [x for x in files if os.path.splitext(x)[-1] in valid_extensions] assert len(mzml_files) != 0 logger.info("Found {0} files".format(len(mzml_files))) for fname in mzml_files: path = os.path.join(input_directory, fname) if not os.path.isfile(path): raise Exception("The input file name {0} is not a file!" .format(path)) hdf_file = mzml_loader.mzml_to_hdf(path) if shock_service_url: shock_info = script_utils.upload_file_to_shock(logger, shock_service_url, hdf_file, token=token) run_info = dict() run_info['mzml_file_name'] = (mzml_file_name or fname.replace('.mzML', '')) run_info['atlases'] = atlases or [] if polarity is not None: run_info['polarity'] = polarity if group is not None: run_info['group'] = group if inclusion_order is not None: run_info['inclusion_order'] = inclusion_order if normalization_factor is not None: run_info['normalization_factor'] = normalization_factor if retention_correction is not None: run_info['retention_correction'] = retention_correction if shock_service_url: handle_id = script_utils.getHandles(logger, shock_service_url, handle_service_url, [shock_info["id"]], token=token)[0] run_info["run_file_id"] = handle_id else: run_info['run_file_id'] = hdf_file output_file_name = fname.replace('.mzML', '_finfo.json') # This generates the json for the object objectString = simplejson.dumps(run_info, sort_keys=True, indent=4) output_file_path = os.path.join(working_directory, output_file_name) with open(output_file_path, "w") as outFile: outFile.write(objectString) logger.info("Conversion completed.")
def convert(file): ind = file[0] fname = file[1] sys.stdout.write('(%s): %s\n' % (ind + 1, fname)) sys.stdout.flush() # Get relevant information about the file. info = patt.match(os.path.abspath(fname)) if info: info = info.groupdict() else: sys.stdout.write("Invalid path name: %s\n" % fname) sys.stdout.flush() return dirname = os.path.dirname(fname) try: username = pwd.getpwuid(os.stat(fname).st_uid).pw_name except OSError: try: username = pwd.getpwuid(os.stat(dirname).st_uid).pw_name except Exception: username = info['username'] # Change to read only. try: os.chmod(fname, 0o660) except Exception as e: sys.stderr.write(str(e) + '\n') sys.stderr.flush() # Copy the original file to a pasteur backup. if os.environ['USER'] == 'pasteur': pasteur_path = fname.replace('raw_data', 'pasteur_backup') dname = os.path.dirname(pasteur_path) if not os.path.exists(dname): os.makedirs(dname) try: shutil.copy(fname, pasteur_path) except IOError as e: if (username not in readonly_files): readonly_files[username] = set() readonly_files[username].add(dirname) return # Get a lock on the mzml file to prevent interference. try: fid = open(fname, 'r') fcntl.flock(fid, fcntl.LOCK_EX | fcntl.LOCK_NB) except IOError: fid.close() msg = '%s already converting in another process\n' % fname sys.stderr.write(msg) sys.stderr.flush() return # Convert to HDF and store the entry in the database. try: hdf5_file = fname.replace('mzML', 'h5') sys.stderr.write('hdf5file is: %s' % hdf5_file) #Get Acquisition Time Here acquisition_time = get_acqtime_from_mzml(fname) mzml_to_hdf(fname, hdf5_file, True) os.chmod(hdf5_file, 0o660) description = info['experiment'] + ' ' + info['path'] ctime = os.stat(fname).st_ctime # Add this to the database unless it is already there try: runs = retrieve('lcmsrun', username='******', mzml_file=fname) except Exception: runs = list() if not len(runs): run = LcmsRun(name=info['path'], description=description, username=info['username'], experiment=info['experiment'], creation_time=ctime, last_modified=ctime, mzml_file=fname, hdf5_file=hdf5_file, acquisition_time=acquisition_time) store(run) except Exception as e: if 'exists but it can not be written' in str(e): if (username not in readonly_files): readonly_files[username] = set() readonly_files[username].add(dirname) else: msg = traceback.format_exception(*sys.exc_info()) msg.insert(0, 'Cannot convert %s' % fname) dat = info['username'] if (dat not in other_errors): other_errors[info['username']] = list() other_errors[info['username']].append('\n'.join(msg)) sys.stderr.write(str(e) + '\n') sys.stderr.flush() try: os.remove(hdf5_file) except: pass finally: fid.close()