Example #1
0
def setup():
    global fid
    paths = get_test_data()
    out_file = 'test_query.h5'

    mzml_to_hdf(paths['basic'], out_file_name=out_file)
    fid = tables.open_file('test_query.h5')
Example #2
0
def test_loader():
    paths = get_test_data()
    out_file = 'test_loader.h5'

    mzml_to_hdf(paths['basic'], out_file_name=out_file)

    check_file(out_file)
Example #3
0
def test_loader():
    paths = get_test_data()
    out_file = 'test_loader.h5'

    mzml_to_hdf(paths['basic'], out_file_name=out_file)

    check_file(out_file)
Example #4
0
def setup():
    global fid
    paths = get_test_data()
    out_file = 'test_query.h5'

    mzml_to_hdf(paths['basic'], out_file_name=out_file)
    fid = tables.open_file('test_query.h5')
Example #5
0
def test_ms_ms():
    paths = get_test_data()
    file2 = 'ms_ms.h5'

    mzml_to_hdf(paths['ms_ms'], out_file_name=file2)

    fid = tables.open_file(file2)

    table = fid.root.ms2_pos

    assert table.nrows == 604775, table.nrows
Example #6
0
def test_ms_ms():
    paths = get_test_data()
    file2 = 'ms_ms.h5'

    mzml_to_hdf(paths['ms_ms'], out_file_name=file2)

    fid = tables.open_file(file2)

    table = fid.root.ms2_pos

    assert table.nrows == 604775, table.nrows
Example #7
0
def test_32_64():
    paths = get_test_data()
    fname = 'mix32_64.h5'

    mzml_to_hdf(paths['mix32_64'], out_file_name=fname)

    fid = tables.open_file(fname)
    print(fid)
    table = fid.root.ms1_pos

    assert table.nrows == 1041183, table.nrows

    table2 = fid.root.ms1_neg

    assert table2.nrows == 762699, table2.nrows
Example #8
0
def test_32_64():
    paths = get_test_data()
    fname = 'mix32_64.h5'

    mzml_to_hdf(paths['mix32_64'], out_file_name=fname)

    fid = tables.open_file(fname)
    print(fid)
    table = fid.root.ms1_pos

    assert table.nrows == 1041183, table.nrows

    table2 = fid.root.ms1_neg

    assert table2.nrows == 762699, table2.nrows
Example #9
0
def transform(shock_service_url=None,
              handle_service_url=None,
              output_file_name=None,
              input_directory=None,
              working_directory=None,
              shock_id=None,
              handle_id=None,
              input_mapping=None,
              mzml_file_name=None,
              polarity=None,
              atlases=None,
              group=None,
              inclusion_order=None,
              normalization_factor=None,
              retention_correction=None,
              level=logging.INFO,
              logger=None):
    """
    Converts mzML file to MetaboliteAtlas2_MAFileInfo json string.

    Args:
        shock_service_url: A url for the KBase SHOCK service.
        handle_service_url: A url for the KBase Handle Service.
        output_file_name: A file name where the output JSON string should be
                          stored.
                          If the output file name is not specified the name
                          will default
                          to the name of the input file appended with
                           '_finfo'.
        input_directory: The directory where files will be read from.
        working_directory: The directory the resulting json file will be
                           written to.
        shock_id: Shock id for the hdf file if it already exists in shock
        handle_id: Handle id for the hdf file if it already exists as a
                    handle
        input_mapping: JSON string mapping of input files to expected types.
                       If you don't get this you need to scan the input
                       directory and look for your files.
        level: Logging level, defaults to logging.INFO.
        atlases: List of MetaboliteAtlas atlas IDs.
        mzml_file_name: Name of the file, optional.  Defaults to the file name.
        polarity: Run polarity.
        group: Run group.
        inclusion_order: Run inclusion_order.
        retention_correction: Run retention_correction.
        normalization_factor: Run normalization factor.

    Returns:
        JSON files on disk that can be saved as a KBase workspace objects.

    Authors:
        Steven Silvester
    """

    if logger is None:
        logger = script_utils.stderrlogger(__file__)

    logger.info("Starting conversion of mzML to MetaboliteAtlas2.MAFileInfo")
    token = os.environ.get('KB_AUTH_TOKEN')

    if not working_directory or not os.path.isdir(working_directory):
        raise Exception(
            "The working directory {0} is not a valid directory!".format(
                working_directory))

    logger.info("Scanning for mzML files.")

    valid_extensions = [".mzML"]

    files = os.listdir(input_directory)
    mzml_files = [
        x for x in files if os.path.splitext(x)[-1] in valid_extensions
    ]

    assert len(mzml_files) != 0

    logger.info("Found {0} files".format(len(mzml_files)))

    for fname in mzml_files:
        path = os.path.join(input_directory, fname)

        if not os.path.isfile(path):
            raise Exception(
                "The input file name {0} is not a file!".format(path))

        hdf_file = mzml_loader.mzml_to_hdf(path)

        if shock_service_url:
            shock_info = script_utils.upload_file_to_shock(logger,
                                                           shock_service_url,
                                                           hdf_file,
                                                           token=token)

        run_info = dict()
        run_info['mzml_file_name'] = (mzml_file_name
                                      or fname.replace('.mzML', ''))
        run_info['atlases'] = atlases or []
        if polarity is not None:
            run_info['polarity'] = polarity
        if group is not None:
            run_info['group'] = group
        if inclusion_order is not None:
            run_info['inclusion_order'] = inclusion_order
        if normalization_factor is not None:
            run_info['normalization_factor'] = normalization_factor
        if retention_correction is not None:
            run_info['retention_correction'] = retention_correction

        if shock_service_url:
            handle_id = script_utils.getHandles(logger,
                                                shock_service_url,
                                                handle_service_url,
                                                [shock_info["id"]],
                                                token=token)[0]
            run_info["run_file_id"] = handle_id
        else:
            run_info['run_file_id'] = hdf_file

        output_file_name = fname.replace('.mzML', '_finfo.json')

        # This generates the json for the object
        objectString = simplejson.dumps(run_info, sort_keys=True, indent=4)

        output_file_path = os.path.join(working_directory, output_file_name)
        with open(output_file_path, "w") as outFile:
            outFile.write(objectString)

    logger.info("Conversion completed.")
def transform(shock_service_url=None, handle_service_url=None,
              output_file_name=None, input_directory=None,
              working_directory=None, shock_id=None, handle_id=None,
              input_mapping=None, mzml_file_name=None, polarity=None,
              atlases=None, group=None, inclusion_order=None,
              normalization_factor=None, retention_correction=None,
              level=logging.INFO, logger=None):
    """
    Converts mzML file to MetaboliteAtlas2_MAFileInfo json string.

    Args:
        shock_service_url: A url for the KBase SHOCK service.
        handle_service_url: A url for the KBase Handle Service.
        output_file_name: A file name where the output JSON string should be
                          stored.
                          If the output file name is not specified the name
                          will default
                          to the name of the input file appended with
                           '_finfo'.
        input_directory: The directory where files will be read from.
        working_directory: The directory the resulting json file will be
                           written to.
        shock_id: Shock id for the hdf file if it already exists in shock
        handle_id: Handle id for the hdf file if it already exists as a
                    handle
        input_mapping: JSON string mapping of input files to expected types.
                       If you don't get this you need to scan the input
                       directory and look for your files.
        level: Logging level, defaults to logging.INFO.
        atlases: List of MetaboliteAtlas atlas IDs.
        mzml_file_name: Name of the file, optional.  Defaults to the file name.
        polarity: Run polarity.
        group: Run group.
        inclusion_order: Run inclusion_order.
        retention_correction: Run retention_correction.
        normalization_factor: Run normalization factor.

    Returns:
        JSON files on disk that can be saved as a KBase workspace objects.

    Authors:
        Steven Silvester
    """

    if logger is None:
        logger = script_utils.stderrlogger(__file__)

    logger.info("Starting conversion of mzML to MetaboliteAtlas2.MAFileInfo")
    token = os.environ.get('KB_AUTH_TOKEN')

    if not working_directory or not os.path.isdir(working_directory):
        raise Exception("The working directory {0} is not a valid directory!"
                        .format(working_directory))

    logger.info("Scanning for mzML files.")

    valid_extensions = [".mzML"]

    files = os.listdir(input_directory)
    mzml_files = [x for x in files
                  if os.path.splitext(x)[-1] in valid_extensions]

    assert len(mzml_files) != 0

    logger.info("Found {0} files".format(len(mzml_files)))

    for fname in mzml_files:
        path = os.path.join(input_directory, fname)

        if not os.path.isfile(path):
            raise Exception("The input file name {0} is not a file!"
                            .format(path))

        hdf_file = mzml_loader.mzml_to_hdf(path)

        if shock_service_url:
            shock_info = script_utils.upload_file_to_shock(logger,
                    shock_service_url, hdf_file, token=token)

        run_info = dict()
        run_info['mzml_file_name'] = (mzml_file_name or
                                      fname.replace('.mzML', ''))
        run_info['atlases'] = atlases or []
        if polarity is not None:
            run_info['polarity'] = polarity
        if group is not None:
            run_info['group'] = group
        if inclusion_order is not None:
            run_info['inclusion_order'] = inclusion_order
        if normalization_factor is not None:
            run_info['normalization_factor'] = normalization_factor
        if retention_correction is not None:
            run_info['retention_correction'] = retention_correction

        if shock_service_url:
            handle_id = script_utils.getHandles(logger, shock_service_url,
                    handle_service_url, [shock_info["id"]], token=token)[0]
            run_info["run_file_id"] = handle_id
        else:
            run_info['run_file_id'] = hdf_file

        output_file_name = fname.replace('.mzML', '_finfo.json')

        # This generates the json for the object
        objectString = simplejson.dumps(run_info, sort_keys=True, indent=4)

        output_file_path = os.path.join(working_directory, output_file_name)
        with open(output_file_path, "w") as outFile:
            outFile.write(objectString)

    logger.info("Conversion completed.")
Example #11
0
def convert(file):
    ind = file[0]
    fname = file[1]

    sys.stdout.write('(%s): %s\n' % (ind + 1, fname))
    sys.stdout.flush()

    # Get relevant information about the file.
    info = patt.match(os.path.abspath(fname))
    if info:
        info = info.groupdict()
    else:
        sys.stdout.write("Invalid path name: %s\n" % fname)
        sys.stdout.flush()
        return
    dirname = os.path.dirname(fname)
    try:
        username = pwd.getpwuid(os.stat(fname).st_uid).pw_name
    except OSError:
        try:
            username = pwd.getpwuid(os.stat(dirname).st_uid).pw_name
        except Exception:
            username = info['username']

    # Change to read only.
    try:
        os.chmod(fname, 0o660)
    except Exception as e:
        sys.stderr.write(str(e) + '\n')
        sys.stderr.flush()

    # Copy the original file to a pasteur backup.
    if os.environ['USER'] == 'pasteur':
        pasteur_path = fname.replace('raw_data', 'pasteur_backup')
        dname = os.path.dirname(pasteur_path)
        if not os.path.exists(dname):
            os.makedirs(dname)
        try:
            shutil.copy(fname, pasteur_path)
        except IOError as e:
            if (username not in readonly_files):
                readonly_files[username] = set()
            readonly_files[username].add(dirname)
            return

    # Get a lock on the mzml file to prevent interference.
    try:
        fid = open(fname, 'r')
        fcntl.flock(fid, fcntl.LOCK_EX | fcntl.LOCK_NB)
    except IOError:
        fid.close()
        msg = '%s already converting in another process\n' % fname
        sys.stderr.write(msg)
        sys.stderr.flush()
        return

    # Convert to HDF and store the entry in the database.
    try:
        hdf5_file = fname.replace('mzML', 'h5')
        sys.stderr.write('hdf5file is: %s' % hdf5_file)
        #Get Acquisition Time Here
        acquisition_time = get_acqtime_from_mzml(fname)
        mzml_to_hdf(fname, hdf5_file, True)
        os.chmod(hdf5_file, 0o660)
        description = info['experiment'] + ' ' + info['path']
        ctime = os.stat(fname).st_ctime
        # Add this to the database unless it is already there
        try:
            runs = retrieve('lcmsrun', username='******', mzml_file=fname)
        except Exception:
            runs = list()
        if not len(runs):
            run = LcmsRun(name=info['path'],
                          description=description,
                          username=info['username'],
                          experiment=info['experiment'],
                          creation_time=ctime,
                          last_modified=ctime,
                          mzml_file=fname,
                          hdf5_file=hdf5_file,
                          acquisition_time=acquisition_time)
            store(run)
    except Exception as e:
        if 'exists but it can not be written' in str(e):
            if (username not in readonly_files):
                readonly_files[username] = set()
            readonly_files[username].add(dirname)
        else:
            msg = traceback.format_exception(*sys.exc_info())
            msg.insert(0, 'Cannot convert %s' % fname)
            dat = info['username']
            if (dat not in other_errors):
                other_errors[info['username']] = list()
            other_errors[info['username']].append('\n'.join(msg))
        sys.stderr.write(str(e) + '\n')
        sys.stderr.flush()
        try:
            os.remove(hdf5_file)
        except:
            pass
    finally:
        fid.close()