Example #1
0
def get_plugin_and_folder(inputzip=None, inputdir=None, inputfile=None):
    """ Main function. """

    if (inputzip and inputdir) \
            or (inputzip and inputfile) \
            or (inputdir and inputfile):
        raise MQ2Exception('You must provide either a zip file or a '
                           'directory or an input file as input.')
    if not inputzip and not inputdir and not inputfile:
        raise MQ2Exception('You must provide either a zip file or a '
                           'directory or an input file as input.')

    # retrieve input: file, directory, zip
    if inputzip:
        tmp_folder = set_tmp_folder()
        extract_zip(inputzip, tmp_folder)
    elif inputfile:
        tmp_folder = inputfile
    else:
        tmp_folder = inputdir

    # retrieve the plugins
    plugins = load('MQ2.plugins', subclasses=PluginInterface)
    LOG.debug('Plugin loaded: %s' % [plugin.name for plugin in plugins])

    # keep only the plugins that will work
    plugins = [plugin for plugin in plugins if plugin.is_applicable()]
    LOG.debug('Plugin applicable: %s' % [plugin.name for plugin in plugins])

    # keep only the plugins that have the file(s) they need
    if inputfile:
        plugins = [
            plugin for plugin in plugins if plugin.valid_file(tmp_folder)
        ]
    else:
        plugins = [
            plugin for plugin in plugins if plugin.get_files(tmp_folder)
        ]

    LOG.debug('Plugin w/ valid input: %s' %
              [plugin.name for plugin in plugins])

    if len(plugins) > 1:
        raise MQ2Exception('Your dataset contains valid input for '
                           'several plugins.')
    if len(plugins) == 0:
        raise MQ2Exception('Invalid dataset: your input cannot not be '
                           'processed by any of the current plugins.')
    plugin = plugins[0]
    return (plugin, tmp_folder)
Example #2
0
def get_qtls_matrix(qtl_matrix, matrix, inputfile):
    """Extract for each position the LOD value obtained and save it in a
    matrix.
    This assumes that the first 4 columns are identical accross all mqo
    files (ie: the Group, Position and Locus are the same). This
    assumption should hold true if the files were generated from the
    same map.

    :arg qtl_matrix, the matrix in which to save the output.
    :arg matrix, the MapQTL file read in memory.
    :arg inputfile, name of the inputfile in which the QTLs have been
        found.

    """
    trait_name = inputfile.split(')_', 1)[1].split('.mqo')[0]
    matrix = list(zip(*matrix))
    if matrix[4][0] != 'LOD':
        raise MQ2Exception(
            'The file "%s" is not supported by MQ2. It may contain an '
            'analysis which does not return LOD values '
            '(such as Kruskal-Wallis or permutation test).' % inputfile)

    if not qtl_matrix:
        qtl_matrix = matrix[:4]
    else:
        if matrix[:4] != qtl_matrix[:4]:
            raise MQ2NoMatrixException(
                'The map used in the file "%s" does not'
                ' correspond to the map used in at least one other file.' %
                inputfile)
    tmp = list(matrix[4])
    tmp[0] = trait_name
    qtl_matrix.append(tmp)
    return qtl_matrix
Example #3
0
def _append_count_to_matrix(qtl_matrixfile, lod_threshold):
    """ Append an extra column at the end of the matrix file containing
    for each row (marker) the number of QTL found if the marker is known
    ie: Locus != ''

    :arg qtl_matrix, the matrix in which to save the output.
    :arg threshold, threshold used to determine if a given LOD value is
        reflective the presence of a QTL.

    """
    if not os.path.exists(qtl_matrixfile):  # pragma: no cover
        raise MQ2Exception('File not found: "%s"' % qtl_matrixfile)
    matrix = read_input_file(qtl_matrixfile, sep=',')
    tmp = list(matrix[0])
    tmp.append('# QTLs')
    matrix[0] = tmp
    cnt = 1
    while cnt < len(matrix):
        row = list(matrix[cnt])
        nr_qtl = 0
        for cel in row[3:]:
            if cel and float(cel) > float(lod_threshold):
                nr_qtl = nr_qtl + 1
        row.append(str(nr_qtl))
        matrix[cnt] = row
        cnt = cnt + 1
    write_matrix(qtl_matrixfile, matrix)
Example #4
0
    def get_session_identifiers(cls, folder=None, inputfile=None):
        """ Retrieve the list of session identifiers contained in the
        data on the folder or the inputfile.
        For this plugin, it returns the list of excel sheet available.

        :kwarg folder: the path to the folder containing the files to
            check. This folder may contain sub-folders.
        :kwarg inputfile: the path to the input file to use

        """
        sessions = []
        if inputfile and folder:
            raise MQ2Exception('You should specify either a folder or a file')
        if folder:
            if not os.path.isdir(folder):
                return sessions
            for root, dirs, files in os.walk(folder):
                for filename in files:
                    filename = os.path.join(root, filename)
                    for ext in SUPPORTED_FILES:
                        if filename.endswith(ext):
                            wbook = xlrd.open_workbook(filename)
                            for sheet in wbook.sheets():
                                if sheet.name not in sessions:
                                    sessions.append(sheet.name)
        elif inputfile:
            if os.path.isdir(inputfile):
                return sessions
            for ext in SUPPORTED_FILES:
                if inputfile.endswith(ext):
                    wbook = xlrd.open_workbook(inputfile)
                    for sheet in wbook.sheets():
                        if sheet.name not in sessions:
                            sessions.append(sheet.name)
        return sessions
Example #5
0
def mq2_run(session_id, plugin, folder, lod_threshold, session):
    """ Run the scripts to extract the QTLs.

    :arg session_id: the session identifier uniquely identifying the
        MapQTL zip file and the JoinMap map file. The session identifier
        also uniquely identifies the folder in which are the files
        uploaded.
    :arg lod_threshold: the LOD threshold to use to consider a value
        significant for a QTL.
    :arg mapqtl_session: the MapQTL session/run from which to retrieve
        the QTLs.
    """
    upload_folder = os.path.join(UPLOAD_FOLDER, session_id)
    already_done = experiment_done(session_id, lod_threshold, session)
    if already_done is not False:
        return already_done
    exp_id = '%s_s%s_t%s' % (generate_exp_id(), session, lod_threshold)
    exp_folder = os.path.join(upload_folder, exp_id)
    if not os.path.exists(exp_folder):
        os.mkdir(exp_folder)

    try:
        run_mq2(plugin,
                folder,
                lod_threshold=lod_threshold,
                session=session,
                outputfolder=exp_folder)

        (nline, ncol) = get_matrix_dimensions(
            os.path.join(exp_folder, 'qtls_matrix.csv'))
    except MQ2Exception, err:
        shutil.rmtree(exp_folder)
        raise MQ2Exception(err)
Example #6
0
def read_excel_file(inputfile, sheet_name):
    """ Return a matrix containing all the information present in the
    excel sheet of the specified excel document.

    :arg inputfile: excel document to read
    :arg sheetname: the name of the excel sheet to return

    """
    workbook = xlrd.open_workbook(inputfile)
    output = []
    found = False
    for sheet in workbook.sheets():
        if sheet.name == sheet_name:
            found = True
            for row in range(sheet.nrows):
                values = []
                for col in range(sheet.ncols):
                    values.append(sheet.cell(row, col).value)
                output.append(values)
    if not found:  # pragma: no cover
        raise MQ2Exception('Invalid session identifier provided')
    return output
Example #7
0
    def convert_inputfiles(cls,
                           folder=None,
                           inputfile=None,
                           session=None,
                           lod_threshold=None,
                           qtls_file='qtls.csv',
                           matrix_file='qtls_matrix.csv',
                           map_file='map.csv'):
        """ Convert the input files present in the given folder or
        inputfile.
        This method creates the matrix representation of the QTLs
        results providing for each marker position the LOD value found
        for each trait as well as a representation of the genetic map
        used in the experiment.
        The genetic map should be cleared of any markers added by the
        QTL mapping software.

        :kwarg folder: the path to the folder containing the files to
            check. This folder may contain sub-folders.
        :kwarg inputfile: the path to the input file to use
        :kwarg session: the session identifier used to identify which
            session to process
        :kwarg lod_threshold: the LOD threshold to apply to determine if
            a QTL is significant or not
        :kwarg qtls_file: a csv file containing the list of all the
            significant QTLs found in the analysis.
            The matrix is of type:
               trait, linkage group, position, Marker, LOD other columns
        :kwarg matrix_file: a csv file containing a matrix representation
            of the QTL data. This matrix is of type:
               marker, linkage group, position, trait1 lod, trait2, lod
        :kwarg map_file: a csv file containing the genetic map used
            in this experiment. The map is of structure:
               marker, linkage group, position

        """
        if folder is None and inputfile is None:
            raise MQ2Exception('You must specify either a folder or an '
                               'input file')

        sessions = cls.get_session_identifiers(folder)
        if session is None:
            raise MQ2NoSessionException(
                'The MapQTL plugin requires a session identifier to '
                'identify the session to process.'
                'Sessions are: %s' % ','.join(sessions))
        elif str(session) not in sessions:
            raise MQ2NoSuchSessionException(
                'The MapQTL session provided (%s) could not be found in the '
                'dataset. '
                'Sessions are: %s' % (session, ','.join(sessions)))

        if folder is not None:
            if not os.path.isdir(folder):  # pragma: no cover
                raise MQ2Exception('The specified folder is actually '
                                   'not a folder')
            else:
                inputfiles = cls.get_files(folder, session_id=session)

        if inputfile is not None:  # pragma: no cover
            if os.path.isdir(inputfile):
                raise MQ2Exception('The specified input file is actually '
                                   'a folder')
            else:
                inputfiles = [inputfile]

        try:
            lod_threshold = float(lod_threshold)
        except ValueError:
            raise MQ2Exception('LOD threshold should be a number')

        inputfiles.sort()

        # QTL matrix and QTL files
        qtl_matrix = []
        qtls = []
        filename = None
        for filename in inputfiles:
            matrix = read_input_file(filename)
            headers = matrix[0]
            qtl_matrix = get_qtls_matrix(qtl_matrix, matrix, filename)
            qtls.extend(
                get_qtls_from_mapqtl_data(matrix, lod_threshold, filename))
        # format QTLs and write down the selection
        headers[0] = 'Trait name'
        qtls.insert(0, headers)
        write_matrix(qtls_file, qtls)

        # Write down the QTL matrix
        del (qtl_matrix[0])
        # Reorganize a couple of columns
        qtl_matrix.insert(0, qtl_matrix[2])
        del (qtl_matrix[3])
        # write output
        qtl_matrix = list(zip(*qtl_matrix))
        write_matrix(matrix_file, qtl_matrix)

        # Map matrix
        map_matrix = get_map_matrix(inputfiles[0])
        write_matrix(map_file, map_matrix)
Example #8
0
    def convert_inputfiles(cls,
                           folder=None,
                           inputfile=None,
                           session=None,
                           lod_threshold=None,
                           qtls_file='qtls.csv',
                           matrix_file='qtls_matrix.csv',
                           map_file='map.csv'):
        """ Convert the input files present in the given folder or
        inputfile.
        This method creates the matrix representation of the QTLs
        results providing for each marker position the LOD value found
        for each trait as well as a representation of the genetic map
        used in the experiment.
        The genetic map should be cleared of any markers added by the
        QTL mapping software.

        :kwarg folder: the path to the folder containing the files to
            check. This folder may contain sub-folders.
        :kwarg inputfile: the path to the input file to use
        :kwarg session: the session identifier used to identify which
            session to process
        :kwarg lod_threshold: the LOD threshold to apply to determine if
            a QTL is significant or not
        :kwarg qtls_file: a csv file containing the list of all the
            significant QTLs found in the analysis.
            The matrix is of type:
               trait, linkage group, position, Marker, LOD other columns
        :kwarg matrix_file: a csv file containing a matrix representation
            of the QTL data. This matrix is of type:
               marker, linkage group, position, trait1 lod, trait2, lod
        :kwarg map_file: a csv file containing the genetic map used
            in this experiment. The map is of structure:
               marker, linkage group, position

        """
        if folder is None and inputfile is None:
            raise MQ2Exception('You must specify either a folder or an '
                               'input file')

        if folder is not None:  # pragma: no cover
            if not os.path.isdir(folder):
                raise MQ2Exception('The specified folder is actually '
                                   'not a folder')
            else:
                inputfiles = cls.get_files(folder)

        if inputfile is not None:  # pragma: no cover
            if os.path.isdir(inputfile):
                raise MQ2Exception('The specified input file is actually '
                                   'a folder')
            else:
                inputfiles = [inputfile]

        if len(inputfiles) == 0:  # pragma: no cover
            raise MQ2Exception('No files correspond to this plugin')

        if len(inputfiles) > 1:  # pragma: no cover
            raise MQ2Exception(
                'This plugin can only process one file at a time')

        try:
            lod_threshold = float(lod_threshold)
        except ValueError:
            raise MQ2Exception('LOD threshold should be a number')

        inputfile = inputfiles[0]

        # QTL matrix and QTL files
        qtls = []
        matrix = read_input_file(inputfile, sep=',', noquote=True)
        qtls.extend(get_qtls_from_rqtl_data(matrix, lod_threshold))
        # format QTLs and write down the selection
        write_matrix(qtls_file, qtls)

        # Write down the QTL matrix
        write_matrix(matrix_file, matrix)

        # Map matrix
        map_matrix = get_map_matrix(inputfile)
        write_matrix(map_file, map_matrix)