Exemple #1
0
def run_analysis(**kwargs):
    """
    function to run analysis (all the pipeline)
    :return:
    """

    xcms_pkl = kwargs['xcms_pkl']
    kwargs.pop('xcms_pkl')

    polarity = kwargs['polarity']
    kwargs.pop('polarity')

    mz_tol_ppm = kwargs['mz_tol_ppm']
    kwargs.pop('mz_tol_ppm')

    is_dims = kwargs['is_dims']
    kwargs.pop('is_dims')

    db_search = kwargs['db_search']
    kwargs.pop('db_search')

    output = kwargs['output']
    kwargs.pop('output')

    bayes = kwargs['bayes']
    kwargs.pop('bayes')

    if xcms_pkl is None or not xcms_pkl:
        raise ValueError("Supply a XCMS peaklist.")
    if not os.path.isfile(xcms_pkl):
        raise ValueError("XCMS peaklist path does not exist.")

    if polarity is None:
        raise ValueError("polarity must be 'negative' or 'positive'.")
    ionisation_mode = IONISATION_MODE.NEG if polarity == 'negative' else IONISATION_MODE.POS

    frag_conf = kwargs.get('frag_conf')
    neg_adducts_conf = kwargs.get('neg_adducts_conf')
    pos_adducts_conf = kwargs.get('pos_adducts_conf')

    exp_settings = ExperimentalSettings(mz_tol_ppm,
                                        ionisation_mode,
                                        is_dims,
                                        frag_conf=frag_conf,
                                        neg_adducts_conf=neg_adducts_conf,
                                        pos_adducts_conf=pos_adducts_conf)

    # clear logging
    logging.getLogger('').handlers = []

    logging.basicConfig(level=logging.INFO)
    t1 = time.clock()

    peakels = PeakListReader(xcms_pkl, exp_settings).get_peakels()
    logging.info("Peaklist loaded.")

    # annotation
    peakels_annotator = PeakelsAnnotator(peakels, exp_settings)
    logging.info("Annotating...")

    best_monos = peakels_annotator.annotate()
    logging.info("Monoisotopic found: #{0}".format(len(best_monos)))

    # database finding
    db = []
    for d in db_search:
        if d not in ('hmdb', 'lmsd'):
            logging.warn(
                'Error specifying db (got {}), only hmdb and lmsd are supported...'
                .format(d))
        else:
            db.append(d)
    db = '+'.join(db)

    search = DatabaseSearch(db, exp_settings)
    logging.info("Searching in database...")
    adducts_l = ['H1']
    nb_metabs, not_found = search.assign_formula(peakels, adducts_l,
                                                 exp_settings.mz_tol_ppm)
    logging.info("Found #{} metabolites, #{} "
                 "elution peak with no metabolite assignments".format(
                     nb_metabs, not_found))

    # scoring first simplistic
    model = StatsModel(peakels, exp_settings.mz_tol_ppm * 1.5)
    logging.info("Compute score 1....")
    # populate annotations objects
    model.calculate_score()
    logging.info("Done.")

    # scoring bayesian inferer
    if bayes:
        bi = BayesianInferer(peakels, exp_settings)
        logging.info("Compute score 2...")
        # populate annotations object
        bi.infer_assignment_probabilities()
        # logging.info("Finished")

    logging.info('Exporting results...')
    exporter = ResultsExporter(output, sorted(peakels, key=lambda _: _.id))
    exporter.write()
    logging.info("Done.")